@string{AAAI =   "Conference on Artificial Intelligence (AAAI)"},
@string{ICDE =   "International Conference on Data Engineering (ICDE)"},
@string{PPDP =   "International Symposium on Principles and Practice of Declarative Programming"},
@string{HOTNETS= "ACM Workshop on Hot Topics in Networks (HotNets)"},
@string{DISC=    "International Symposium on Distributed Computing (DISC)"},
@string{NSDI=    "USENIX Symposium on Networked Systems Design & Implementation (NSDI)"},
@string{CCR =    "SIGCOMM Computer Communication Review (CCR)"},
@string{CCS =    "ACM Conference on Computer and Communication Security (CCS)"},
@string{SAT =    "International Conference on Theory and Applications of Satisfiability Testing (SAT)"},
@string{WWW =    "International conference on World Wide Web (WWW)"},
@string{SIGMETRICS = "International Conference on Measurement & Modeling of Computer Systems (SIGMETRICS)"},
@string{ISPASS= "IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"},
@string{LICS=   "IEEE Symposium on Logic In Computer Science (LICS)"},
@string{FAST=   "USENIX Conference on File and Storage Technologies (FAST)"},
@string{WDDD=   "Workshop on Duplicating, Deconstruction and Debunking (WDDD)"},
@string{PODS=   "Symposium on Principles of Database Systems (PODS)"},
@string{WASP=   "Workshop on Application-Specific Processors (WASP)"},
@string{OOPSLA=	"Object Oriented Programming: Systems, Languages, and Applications (OOPSLA)"},
@string{POPL=	"ACM Symposium on Principles of Programming Languages (POPL)"},
@string{PLDI=	"ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI)"},
@string{ISCA=	"International Symposium on Computer Architecture (ISCA)"},
@string{DAC=	"Design Automation Conference (DAC)"},
@string{DATE=   "Design, Automation and Test in Europe (DATE)"},
@string{RAW=    "Reconfigurable Architectures Workshop (RAW)"},
@string{ASPLOS= "International Conference on Architectural Support for Programming Languages and
                  Operating Systems (ASPLOS)"},
@string{FCCM=   "IEEE Symposium on Field-Programmable Custom Computing Machines (FCCM)"},
@string{FPGA=   "ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA)"},
@string{CAV=    "Computer-Aided Verification (CAV)"},
@string{CODES=  "International Symposium on Hardware/Software Codesign (CODES)"},
@string{FPL=    "International Conference on Field Programmable Logic and Applications (FPL)"},
@string{EUROPAR="European Conference on Parallel Processing (EUROPAR)"},
@string{CGO=    "International ACM/IEEE Symposium on Code Generation and Optimization (CGO)"},
@string{PPOPP=  "ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP)"},
@string{CC=     "International Conference on Compiler Construction (CC)"},
@string{PACT=   "International Conference on Parallel Architectures and Compilation Techniques (PACT)"},
@string{LCPC=   "Workshop on Languages and Compilers for Parallel Computing (LCPC)"},
@string{ICCAD=  "IEEE/ACM International Conference on Computer-aided design (ICCAD)"},
@string{MICRO=  "IEEE/ACM International Symposium on Microarchitecture (MICRO)"},
@string{SAS=    "International Static Analysis Symposium (SAS)"},
@string{TACAS=  "International Conference on Tools and Algorithms for the
                 Construction and Analysis of Systems (TACAS)"},
@string{CASES=  "International Conference on Compilers, Architecture,
                 and Synthesis for Embedded Systems (CASES)"},
@string{HPCA=   "International Symposium on High-Performance Computer Architecture (HPCA)"},
@string{ASYNC=  "International Symposium on Advanced Research in Asynchronous Circuits and Systems (ASYNC)"},
@string{ICS=    "{ACM} International Conference on Supercomputing (ICS)"},
@string{ICCD=   "International Conference on Computer Design (ICCD)"},
@string{SIGGRAPH="Annual Conference on Computer Graphics and Interactive Techniques (SIGGRAPH)"},
@string{SOSP=   "ACM Symposium on Operating Systems Principles (SOSP)"},
@string{FPT=    "IEEE International Conference on Field-Programmable Technology (FPT)"},
@string{SPAA=   "ACM Symposium on Parallel Algorithms and Architectures (SPAA)"},
@string{OSDI=   "Symposium on Operating System Design and Implementation (OSDI)"},
@string{ISLPED= "International Symposium on Low-Power Design (ISLPED)"},
@string{IWLS=   "International Workshop on Logic synthesis (IWLS)"},
@string{VLSI=   "International Conference on VLSI Design (VLSI)"},
@string{ISCAS=  "IEEE International Symposium on Circuits and Systems (ISCAS)"},
@string{USENIX= "USENIX Annual Technical Conference (USENIX)"},
@string{ISSCC=  "IEEE International Solid-State Circuits Conference (ISSCC)"},
@string{LCTES=  "ACM SIGPLAN/SIGBED Conference on Languages, Compilers, and Tools for Embedded Systems(LCTES)"},
@string{ICFP=   "International Conference on Functional Programming (ICFP)"},
@string{NDSS=   "Network and Distributed System Security Symposium (NDSS)"},
@string{USS=    "USENIX Security Symposium (USS)"},
@string{ISSP=   "IEEE Symposium on Security and Privacy (ISSP)"},
@string{SIGMOD= "ACM SIGMOD International conference on Management of data (SIGMOD)"},
@string{VLDB=   "International Conference of Very Large Data Bases (VLDB)"},
@string{CIDR =  "Conference on Innovative Data Systems Research (CIDR)"},

%Journals
@string{JPDC=	"Journal of Parallel and Distributed Computing (JPDC)"},
@string{CACM=	"Communications of the ACM (CACM)"},
@string{JACM=	"Journal of the ACM (JACM)"},
@string{TCS=    "Transactions on Computer Systems (TOCS)"},
@string{TOCS=   "Transactions on Computer Systems (TOCS)"},
@string{TOC=    "IEEE Transactions on Computers (TOC)"},
@string{TOPLAS= "ACM Transactions on Programming Languages and Systems (TOPLAS)"},
@string{JSSC=   "IEEE Journal of Solid-State Circuits (JSSC)"},
%Others
@string{LNCS=	"Lecture Notes in Computer Science (LNCS)"},
@string{springer="Springer Verlag"}

@article{green-tcs11,
  title =	 {Reconcilable differences},
  author =	 {Green, Todd J and Ives, Zachary G and Tannen, Val},
  journal =	 {Theory of Computing Systems},
  volume =	 49,
  number =	 2,
  pages =	 {460--488},
  year =	 2011,
  publisher =	 {Springer},
  url =          {https://web.cs.ucdavis.edu/~green/papers/tocs11_differences.pdf},
  abstract =	 {In this paper we study a problem motivated by the
                  management of changes in databases. It turns out
                  that several such change scenarios, e.g., the
                  separately studied problems of view maintenance
                  (propagation of data changes) and view adaptation
                  (propagation of view definition changes) can be
                  unified as instances of query reformulation using
                  views provided that support for the relational
                  difference operator exists in the context of query
                  reformulation. Exact query reformulation using views
                  in positive relational languages is well understood,
                  and has a variety of applications in query
                  optimization and data sharing. Unfortunately, most
                  questions about queries become undecidable in the
                  presence of difference (or negation), whether we use
                  the foundational set semantics or the more practical
                  bag semantics.  We present a new way of managing
                  this difficulty by defining a novel semantics,
                  Z-relations, where tuples are annotated with
                  positive or negative integers. Z-relations
                  conveniently represent data, insertions, and
                  deletions in a uniform way, and can apply deletions
                  with the union operator (deletions are tuples with
                  negative counts). We show that under Z-semantics
                  relational algebra (RA) queries have a normal form
                  consisting of a single difference of positive
                  queries, and this leads to the decidability of their
                  equivalence. We provide a sound and complete
                  algorithm for reformulating RA queries, including
                  queries with difference, over
                  Z-relations. Additionally, we show how to support
                  standard view maintenance and view adaptation over
                  set or bag semantics, through an excursion into the
                  Z-semantics setting. Our algorithm turns out to be
                  sound and complete also for bag semantics, albeit
                  necessarily only for a subclass of RA. This subclass
                  turns out to be quite large and covers generously
                  the applications of interest to us. We also show a
                  subclass of RA where reformulation and evaluation
                  under Z-semantics can be combined with duplicate
                  elimination to obtain the answer under set
                  semantics. We investigate related complexity
                  questions, and we also extend our results to queries
                  with built-in predicates.},
  comments =	 {Develops an algebra of multisets using Z and proves
                  some interesting completeness theorems.  Lemma 6.3 is
		          crucial for DDlog.  Proposition 6.13 is about distinct.}
}

@inproceedings{green-pods07,
  author =	 {Green, Todd J. and Karvounarakis, Grigoris and
                  Tannen, Val},
  title =	 {Provenance Semirings},
  year =	 2007,
  url =		 {https://doi.org/10.1145/1265530.1265535},
  abstract =	 {We show that relational algebra calculations for
                  incomplete databases, probabilistic databases, bag
                  semantics and why-provenance are particular cases of
                  the same general algorithms involving
                  semirings. This further suggests a comprehensive
                  provenance representation that uses semirings of
                  polynomials. We extend these considerations to
                  datalog and semirings of formal power series. We
                  give algorithms for datalog provenance calculation
                  as well as datalog evaluation for incomplete and
                  probabilistic databases. Finally, we show that for
                  some semirings containment of conjunctive queries is
                  the same as for standard set semantics.},
  booktitle =	 PODS,
  pages =	 {31–40},
  numpages =	 10,
  keywords =	 {incomplete databases, probabilistic databases,
                  datalog, formal power series, data lineage,
                  semirings, data provenance},
  location =	 {Beijing, China},
}

@InProceedings{abadi-fossacs15,
  author =	 {Martín Abadi and Frank McSherry and Gordon Plotkin},
  title =	 {Foundations of Differential Dataflow},
  booktitle =	 {Foundations of Software Science and Computation
                  Structures (FoSSaCS)},
  year =	 2015,
  url =          {http://homepages.inf.ed.ac.uk/gdp/publications/differentialweb.pdf},
  abstract =	 {Differential dataflow is a recent approach to
                  incremental computation that relies on a partially
                  ordered set of differences. In the present paper, we
                  aim to develop its foundations. We define a small
                  programming language whose types are abelian groups
                  equipped with linear inverses, and provide both a
                  standard and a differential denotational
                  semantics. The two semantics coincide in that the
                  differential semantics is the differential of the
                  standard one. M\"{o}bius inversion, a well-known
                  idea from combinatorics, permits a systematic
                  treatment of various operators and constructs.},
  comments =	 {A formal justification of the implementation of
                  Differential Dataflow}
}

@inproceedings{murray-sosp13,
  author =       {Murray, Derek G. and McSherry, Frank and Isaacs,
                  Rebecca and Isard, Michael and Barham, Paul and
                  Abadi, Mart\'{\i}n},
  title =        {{Naiad}: A Timely Dataflow System},
  year =         2013,
  url =          {https://doi.org/10.1145/2517349.2522738},
  doi =          {10.1145/2517349.2522738},
  abstract =     {Naiad is a distributed system for executing data
                  parallel, cyclic dataflow programs. It offers the
                  high throughput of batch processors, the low latency
                  of stream processors, and the ability to perform
                  iterative and incremental computations. Although
                  existing systems offer some of these features,
                  applications that require all three have relied on
                  multiple platforms, at the expense of efficiency,
                  maintainability, and simplicity. Naiad resolves the
                  complexities of combining these features in one
                  framework.A new computational model, timely
                  dataflow, underlies Naiad and captures opportunities
                  for parallelism across a wide class of
                  algorithms. This model enriches dataflow computation
                  with timestamps that represent logical points in the
                  computation and provide the basis for an efficient,
                  lightweight coordination mechanism.We show that many
                  powerful high-level programming models can be built
                  on Naiad's low-level primitives, enabling such
                  diverse tasks as streaming data analysis, iterative
                  machine learning, and interactive graph
                  mining. Naiad outperforms specialized systems in
                  their target application domains, and its unique
                  features enable the development of new
                  high-performance applications.},
  booktitle =    SOSP,
  pages =        {439–455},
  numpages =     17,
  location =     {Farminton, Pennsylvania},
}

@inproceedings{mcsherry-cidr13,
  author =       {Frank McSherry and Derek Gordon Murray and Rebecca
                  Isaacs and Michael Isard},
  title =        {Differential Dataflow},
  booktitle =    CIDR,
  year =         2013,
  month =        {January 6--9},
  address =      {Asilomar, CA},
  url =          {http://cidrdb.org/cidr2013/Papers/CIDR13\_Paper111.pdf},
  abstract =     {Existing computational models for processing
                  continuously changing input data are unable to
                  efficiently support iterative queries except in
                  limited special cases. This makes it difficult to
                  perform complex tasks, such as social-graph analysis
                  on changing data at interactive timescales, which
                  would greatly benefit those analyzing the behavior
                  of services like Twitter. In this paper we introduce
                  a new model called differential computation, which
                  extends traditional incremen- tal computation to
                  allow arbitrarily nested iteration, and explain—with
                  reference to a publicly available prototype system
                  called Naiad—how differential computation can be
                  efficiently implemented in the context of a
                  declarative dataparallel dataflow language. The
                  resulting system makes it easy to program previously
                  intractable algorithms such as incrementally updated
                  strongly connected components, and integrate them
                  with data transformation operations to obtain
                  practically relevant insights from real data
                  streams.}
}

@InProceedings{alvaro-datalog11,
  author =       "Alvaro, Peter and Marczak, William R.  and Conway,
                  Neil and Hellerstein, Joseph M.  and Maier, David
                  and Sears, Russell",
  title =        "{Dedalus}: {Datalog} in Time and Space",
  booktitle =    "Datalog Reloaded",
  year =         2011,
  publisher =    "Springer Berlin Heidelberg",
  address =      "Berlin, Heidelberg",
  pages =        "262--281",
  abstract =     "Recent research has explored using Datalog-based
                  languages to express a distributed system as a set
                  of logical invariants. Two properties of distributed
                  systems proved difficult to model in Datalog. First,
                  the state of any such system evolves with its
                  execution. Second, deductions in these systems may
                  be arbitrarily delayed, dropped, or reordered by the
                  unreliable network links they must
                  traverse. Previous efforts addressed the former by
                  extending Datalog to include updates, key
                  constraints, persistence and events, and the latter
                  by assuming ordered and reliable delivery while
                  ignoring delay. These details have a semantics
                  outside Datalog, which increases the complexity of
                  the language and its interpretation, and forces
                  programmers to think operationally. We argue that
                  the missing component from these previous languages
                  is a notion of time.  In this paper we present
                  Dedalus, a foundation language for programming and
                  reasoning about distributed systems. Dedalus reduces
                  to a subset of Datalog with negation, aggregate
                  functions, successor and choice, and adds an
                  explicit notion of logical time to the language. We
                  show that Dedalus provides a declarative foundation
                  for the two signature features of distributed
                  systems: mutable state, and asynchronous processing
                  and communication. Given these two features, we
                  address two important properties of programs in a
                  domain-specific manner: a notion of safety
                  appropriate to non-terminating computations, and
                  stratified monotonic reasoning with negation over
                  time. We also provide conservative syntactic checks
                  for our temporal notions of safety and
                  stratification. Our experience implementing
                  full-featured systems in variants of Datalog
                  suggests that Dedalus is well-suited to the
                  specification of rich distributed services and
                  protocols, and provides both cleaner semantics and
                  richer tests of correctness.",
  url =          {http://db.cs.berkeley.edu/papers/datalog2011-dedalus.pdf},
  comments =     {Declarative programming for distributed systems.
                  Successor relation models time.  Time is an explicit
                  attribute of a relation.  Deductive rule = same
                  time, inductive rule = successor time.  Syntactic
                  sugar for these, using @next attached to a head to
                  indicate inductive rule.  Allow "timestamped" facts.
                  Cannot join across different times.  Persistent
                  rules are valid at all times.  Mutable state: facts
                  are removed in time.  Add counters, queues.
                  Deductively stratifiable => has a perfect model.
                  There are no "cycles" in time.  Some programs are
                  quiescent: after some time they derive no new facts.
                  Add a non-deterministic "choice" construct.  Also
                  add "location" like Loo's work.  Add "async" to
                  indicate that something happens at an unspecified
                  time (possibly never).  Nice collection of related
                  work with dialects of Datalog.  Discusses Overlog
                  execution model using a "chain of fixedpoints"; but
                  Overlog semantics is not well-specified.}
}

@InProceedings{mamouras-esop20,
  author =	 "Mamouras, Konstantinos",
  editor =	 "M{\"u}ller, Peter",
  title =	 "Semantic Foundations for Deterministic Dataflow and
                  Stream Processing",
  booktitle =	 "European Symposium on Programming",
  year =	 2020,
  address =	 "Dublin, Ireland",
  month =	 "April 25–30",
  pages =	 "394--427",
  abstract =	 "We propose a denotational semantic framework for
                  deterministic dataflow and stream processing that
                  encompasses a variety of existing streaming
                  models. Our proposal is based on the idea that data
                  streams, stream transformations, and
                  stream-processing programs should be classified
                  using types. The type of a data stream is captured
                  formally by a monoid, an algebraic structure with a
                  distinguished binary operation and a unit. The
                  elements of a monoid model the finite fragments of a
                  stream, the binary operation represents the
                  concatenation of stream fragments, and the unit is
                  the empty fragment. Stream transformations are
                  modeled using monotone functions on streams, which
                  we call stream transductions. These functions can be
                  implemented using abstract machines with a
                  potentially infinite state space, which we call
                  stream transducers. This abstract typed framework of
                  stream transductions and transducers can be used to
                  (1) verify the correctness of streaming
                  computations, that is, that an implementation
                  adheres to the desired behavior, (2) prove the
                  soundness of optimizing transformations, e.g. for
                  parallelization and distribution, and (3) inform the
                  design of programming models and query languages for
                  stream processing. In particular, we show that
                  several useful combinators can be supported by the
                  full class of stream transductions and transducers:
                  serial composition, parallel composition, and
                  feedback composition.",
}

@article{gammie-acs13,
  author =	 {Gammie, Peter},
  title =	 {Synchronous Digital Circuits as Functional Programs},
  year =	 2013,
  issue_date =	 {November 2013},
  volume =	 46,
  number =	 2,
  issn =	 {0360-0300},
  url =		 {https://doi.org/10.1145/2543581.2543588},
  abstract =	 {Functional programming techniques have been used to
                  describe synchronous digital circuits since the
                  early 1980s. Here we survey the systems and formal
                  underpinnings that constitute this tradition. We
                  situate these techniques with respect to other
                  formal methods for hardware design and discuss the
                  work yet to be done.},
  journal =	 {ACM Comput. Surv.},
  month =	 nov,
  articleno =	 21,
  numpages =	 27
}

@PhdThesis{johnson-phd83,
  author =	 {Johnson, Steven Dexter},
  title =	 {Synthesis of Digital Designs from Recursion
                  Equations},
  school =	 {Indiana University},
  year =	 1983,
  month =	 {May},
  note =         {https://help.luddy.indiana.edu/techreports/TRNNN.cgi?trnum=TR141},
  abstract =	 {The discipline of applicative program design style
                  is adapted to the design of digit at (sometimes
                  called synchronous) systems. The result is a
                  powerful and natural methodology for engineering
                  correct hardware implementations. This dissertation
                  presents a method to develop digital/synchronous
                  system descriptions from recursive specifications;
                  offers a prototype genera] purpose modeling language
                  that supports this design task; and makes a formal
                  connection between functional recursion and
                  component connectivity that is pleasantly direct,
                  suggesting that applicative notation is the
                  appropriate basis for digital design.  Design is a
                  translation of notation from an abstractly
                  descriptive specification to a concretely
                  descriptive realization. Recursion equations are
                  used as a specification language. The realization
                  language is another form of recursion in which
                  variables denote sequences (rather than functions)
                  that represent digital component behavior.
                  Self-reference in realizations corresponds to
                  feedback in a physical implementation.  Synthesis is
                  a method for constructing realizations that are
                  guaranteed to meet their specifications. It is a
                  synonym of “engineering" peculiar to computer
                  science, where the concern is not only with methods
                  but also with their automation. This term suggests a
                  factor of human guidance, as opposed to compilation
                  which does not. Realizations can, however, be
                  compiled from iterative specifications. Even for the
                  case of non­ iterative specifications, synthesis of
                  an iterative version is the primary tactic
                  here. This tactic formalizes the conventional
                  digital design technique of decomposing a circuit
                  into an architecture and a finite state controller.
                  The formal setting for a discussion of this topic is
                  the calculus of Scott and Strachey. A specification
                  denotes the fixed point of a functional; a
                  realization denotes a fixed point in a domain of
                  sequences. This approach to synthesis, then, is yet
                  another application of modeling "function recursion"
                  with “data recursion", or reflexivity. An
                  interpreter has been implemented for Daisy, a
                  dialect of the Scott-Strachey notation.  Any
                  description expressed in Daisy can be directly
                  executed at successive steps in its evolution. Thus,
                  the notation that serves as the medium of
                  engineering serves also as a vehicle for
                  experimentation. This is important to the practice
                  of design because the engineer can explore some
                  aspects of performance without expensive
                  constructions of hardware prototypes, or risky
                  recodings in a simulation language.  Two examples
                  follow, A non-trivial exercise in language-driven
                  design, derivation of a controller for an
                  applicative language interpreter, reveals that
                  powerful global structuring techniques, such as
                  hierarchical decomposition and data abstraction, are
                  inherited immediately from the functional style of
                  description. Executability of the current description
                  at each stage of the derivation provides a model for
                  testing representation decisions and trivial
                  modifications. Next, a specialized algebra is
                  developed to address a typical local refinement
                  problem: reducing external connections by means of
                  serialization.  Thus, local as well as global design
                  problems yield to the applicative method.
                  Applicative notation is especially suitable for
                  digital circuit description because the basic
                  algebra is the same in both realms. Even though the
                  underlying symbols are interpreted differently
                  (f.e. operations vs. components; values vs. signals)
                  the manner of combining them (e,Q
                  composition/construction vs. serial/parallel wiring)
                  is identical. Hence, recursion equations, McCarthy’s
                  mathematical basis for the science of computation,
                  is fitting for hardware design because it so well
                  reflects the physical basis of computation: digital
                  electronics.}
}

@Article{leiserson-algorithmica91,
  author =	 {Charles E. Leiserson and James B. Saxe},
  title =	 {Retiming Synchronous Circuitry},
  journal =	 {Algorithmica},
  year =	 1991,
  volume =	 6,
  pages =	 {5--35},
  abstract =	 {This paper describes a circuit transformation called
                  retiming in which registers are added at some
                  points in a circuit and removed from others in such
                  a way that the functional behavior of the circuit as
                  a whole is preserved. We show that retiming can be
                  used to transform a given synchronous circuit into a
                  more efficient circuit under a variety of different
                  cost criteria. We model a circuit as a graph in
                  which the vertex set Visa collection of
                  combinational logic elements and the edge set E is
                  the set of interconnections, each of which may pass
                  through zero or more registers. We give an $0(|V|
                  |E| lg |V|)$ algorithm for determining an equivalent
                  retimed circuit with the smallest possible clock
                  period. We show that the problem of determining an
                  equivalent retimed circuit with minimum state (total
                  number of registers) is polynomial-time
                  solvable. This result yields a polynomial-time
                  optimal solution to the problem of pipelining
                  combinational circuitry with minimum register
                  cost. We also give a chacterization of optimal
                  retiming based on an efficiently solvable
                  mixed-integer linear-programming problem.},
  doi =          {https://doi.org/10.1007/BF01759032},
  url =          {http://www.cs.columbia.edu/~CS6861/handouts/leiserson-algorithmica-88.pdf}
}

@inproceedings{koch-pods10,
  author =	 {Koch, Christoph},
  title =	 {Incremental Query Evaluation in a Ring of Databases},
  year =	 2010,
  url =		 {https://doi.org/10.1145/1807085.1807100},
  abstract =	 {This paper approaches the incremental view
                  maintenance problem from an algebraic
                  perspective. We construct the algebraic structure of
                  a ring of databases and use it as the foundation of
                  the design of a query calculus that allows to
                  express powerful aggregate queries. The query
                  calculus inherits key properties of the ring, such
                  as having a normal form of polynomials and being
                  closed under computing inverses and delta
                  queries. The k-th delta of a polynomial query of
                  degree k without nesting is purely a function of the
                  update, not of the database. This gives rise to a
                  method of eliminating expensive query operators such
                  as joins from programs that perform incremental view
                  maintenance. The main result is that, for non-nested
                  queries, each individual aggregate value can be
                  incrementally maintained using a constant amount of
                  work. This is not possible for nonincremental
                  evaluation.},
  booktitle =	 PODS,
  pages =	    {87–98},
  numpages =	 12,
  keywords =	 {incremental view maintenance, algebra},
  location =	 {Indianapolis, Indiana, USA}
}

@InProceedings{ryzhyk-datalog19,
   author =	{Leonid Ryzhyk and Mihai Budiu},
   title =	{Differential Datalog},
   booktitle =	{Datalog 2.0},
   address =	{Philadelphia, PA},
   month =	{June 4-5},
   year =	{2019},
   abstract =	{Many real-world applications based on deductive databases require
                 incrementally updating output relations (tables) in response to changes
                 to input relations. To make such applications easier to implement we
                 have created Differential Datalog (DDlog), a dialect of Datalog that
                 automates incremental computation. A DDlog programmer writes
                 traditional, non-incremental Datalog programs. The execution model of
                 DDlog is however fully incremental: at runtime DDlog programs receive
                 streams of changes to the input relations (insertions or deletions) and
                 produce streams of corresponding changes to derived relations. The
                 DDlog compiler translates DDlog programs to Differential Dataflow (DD)
                 programs; DD provides an incremental execution engine supporting all
                 the relational operators, including fixed-point. The DDlog runtime
                 automatically maintains the indexes required to efficiently compute
                 output updates. \par The DDlog language is targeted for system
                 builders. In consequence, the language emphasizes usability, by
                 providing a rich type system, a powerful expression language, a module
                 system, including string manipulation, arithmetic, and integration with
                 C, Rust, and Java. The code is open-source, available using an MIT
                 permissive license.},
   url =	{http://budiu.info/work/ddlog.pdf},
   confweb =	{https://sites.sju.edu/plw/datalog2/}
}

@inproceedings{lungeanu-date00,
  author =	 {Lungeanu, Dragos and Shi, C.-J. Richard},
  title =	 {Parallel and Distributed {VHDL} Simulation},
  year =	 2000,
  url =		 {https://doi.org/10.1145/343647.343884},
  booktitle =	 {Proceedings of the Conference on Design, Automation
                  and Test in Europe (DATE)},
  pages =	 {658-662},
  numpages =	 5,
  location =	 {Paris, France},
  series =	 {DATE '00},
  abstract =	 {This paper presents a methodology for parallel and
                  distributed simulation of VHDL using the PDES
                  (parallel discrete-event simulation) paradigm. To
                  achieve better features and performance, some PDES
                  protocols assume that simultaneous events may be
                  processed in arbitrary order.  We describe a
                  solution of how to apply these algorithms to have a
                  correct simulation of the distributed VHDL cycle,
                  including the delta cycle. The solution is based on
                  tie- breaking the simultaneous events using
                  Lamport's logical clocks to causally order them
                  according to the VHDL simulation cycle,
                  and defining the VHDL virtual time as a pair of
                  simulation physical time and cycle/phase logical
                  time. The paper also shows how to use this method
                  with a PDES protocol that relaxes the simulation of
                  simultaneous events to arbitrary order, allowing the
                  LPs to self-adapt to optimistic or conservative
                  mode, without the lookahead requirement.  The
                  lookahead is application-dependent and for some
                  systems may be zero or unknown. The parallel
                  simulation of VHDL designs ranging from 5531 to
                  14704 LPs using these methods obtained a promising,
                  almost linear speedup.},
  comments =     {Good survey of simulation methodologies for VHDL.}
}

@inproceedings{baker-date96,
  author =	 {Baker, W. and Newton, A.},
  title =	 {The Maximal {VHDL} Subset with a Cycle-Level
                  Abstraction},
  year =	 1996,
  booktitle =	 {Proceedings of the Conference on European Design
                  Automation (DATE)},
  pages =	 {470-475},
  numpages =	 6,
  location =	 {Geneva, Switzerland},
  abstract =	 {The maximal VHDL subset with a cycle-level
                  abstraction is defined. This subset requires that
                  the description have three semantic properties:
                  responsiveness, modularity and causality, but full
                  VHDL is neither modular nor causal. Synchronous VHDL
                  is the responsive, modular and causal subset of
                  VHDL. The compiler uses modularity-checking and
                  causality-checking to identify admissible programs.}
}

@inproceedings{koch-pods16,
  author =	 {Koch, Christoph and Lupei, Daniel and Tannen, Val},
  title =	 {Incremental View Maintenance For Collection
                  Programming},
  year =	 2016,
  url =		 {https://doi.org/10.1145/2902251.2902286},
  abstract =	 {In the context of incremental view maintenance
                  (IVM), delta query derivation is an essential
                  technique for speeding up the processing of large,
                  dynamic datasets. The goal is to generate delta
                  queries that, given a small change in the input, can
                  update the materialized view more efficiently than
                  via recomputation.In this work we propose the first
                  solution for the efficient incrementalization of
                  positive nested relational calculus (NRC+) on bags
                  (with integer multiplicities). More precisely, we
                  model the cost of NRC+ operators and classify
                  queries as efficiently incrementalizable if their
                  delta has a strictly lower cost than full
                  re-evaluation. Then, we identify NRC+, a large
                  fragment of NRC+ that is efficiently
                  incrementalizable and we provide a
                  semantics-preserving translation that takes any NRC+
                  query to a collection of IncNRC+
                  queries. Furthermore, we prove that incremental
                  maintenance for NRC+ is within the complexity class
                  NC0 and we showcase how recursive IVM, a technique
                  that has provided significant speedups over
                  traditional IVM in the case of flat queries [25],
                  can also be applied to IncNRC+.},
  booktitle =	 PODS,
  pages =	 {75–90},
  numpages =	 16,
  keywords =	 {recursive ivm, nested relational calculus,
                  higher-order delta derivation, shredding
                  transformation, collection programming, incremental
                  view maintenance, incremental computation, nested
                  relational algebra},
  location =	 {San Francisco, California, USA},
}

@article{ahmad-vldb09,
  author =	 {Ahmad, Yanif and Koch, Christoph},
  title =	 {{DBToaster}: A {SQL} Compiler for High-Performance
                  Delta Processing in Main-Memory Databases},
  year =	 2009,
  volume =	 2,
  number =	 2,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/1687553.1687592},
  abstract =	 {We present DBToaster, a novel query compilation
                  framework for producing high performance compiled
                  query executors that incrementally and continuously
                  answer standing aggregate queries using in-memory
                  views. DBToaster targets applications that require
                  efficient main-memory processing of standing queries
                  (views) fed by high-volume data streams, recursively
                  compiling view maintenance (VM) queries into simple
                  C++ functions for evaluating database updates
                  (deltas). While today's VM algorithms consider the
                  impact of single deltas on view queries to produce
                  maintenance queries, we recursively consider deltas
                  of maintenance queries and compile to thoroughly
                  transform queries into code.  Recursive compilation
                  successively elides certain scans and joins, and
                  eliminates significant query plan interpreter
                  overheads.In this demonstration, we walk through our
                  compilation algorithm, and show the significant
                  performance advantages of our compiled executors
                  over other query processors. We are able to
                  demonstrate 1--3 orders of magnitude improvements in
                  processing times for a financial application and a
                  data warehouse loading application, both implemented
                  across a wide range of database systems, including
                  PostgreSQL, HSQLDB, a commercial DBMS 'A', the
                  Stanford STREAM engine, and a commercial stream
                  processor 'B'.},
  journal =	 {Proc. VLDB Endow.},
  month =	 aug,
  pages =	 {1566–1569},
  numpages =	 4
}

@article{chothia-vldb16,
  author =	 {Chothia, Zaheer and Liagouris, John and McSherry,
                  Frank and Roscoe, Timothy},
  title =	 {Explaining Outputs in Modern Data Analytics},
  year =	 2016,
  volume =	 9,
  number =	 12,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/2994509.2994530},
  abstract =	 {We report on the design and implementation of a
                  general framework for interactively explaining the
                  outputs of modern data-parallel computations,
                  including iterative data analytics. To produce
                  explanations, existing works adopt a naive backward
                  tracing approach which runs into known issues; naive
                  backward tracing may identify: (i) too much
                  information that is difficult to process, and (ii)
                  not enough information to reproduce the output,
                  which hinders the logical debugging of the
                  program. The contribution of this work is
                  twofold. First, we provide methods to effectively
                  reduce the size of explanations based on the first
                  occurrence of a record in an iterative computation.
                  Second, we provide a general method for identifying
                  explanations that are sufficient to reproduce the
                  target output in arbitrary computations -- a problem
                  for which no viable solution existed until now. We
                  implement our approach on differential dataflow, a
                  modern high-throughput, low-latency dataflow
                  platform. We add a small (but extensible) set of
                  rules to explain each of its data-parallel
                  operators, and we implement these rules as
                  differential dataflow operators themselves. This
                  choice allows our implementation to inherit the
                  performance characteristics of differential
                  dataflow, and results in a system that efficiently
                  computes and updates explanatory inputs even as the
                  inputs of the reference computation change. We
                  evaluate our system with various analytic tasks on
                  real datasets, and we show that it produces concise
                  explanations in tens of milliseconds, while
                  remaining faster -- up to two orders of magnitude --
                  than even the best implementations that do not
                  support explanations.},
  journal =	 {Proc. VLDB Endow.},
  month =	 aug,
  pages =	 {1137–1148},
  numpages =	 12,
  url2 =	 {http://www.vldb.org/pvldb/vol9/p1137-chothia.pdf},
  comments =     {Backwards tracing: keep state at each operator to
                  enable reconstruction of lineage.  State
                  proportional to data processed.  Do not provide all
                  explanation, but only the first one computed.  How
                  to trace backwards?  Easy for one-to-one or
                  many-to-one operators or monotone operators.  They
                  can handle any graph, including cycles.  Nice
                  explanation of basic provenance concepts: "why
                  provenance" (multi-set of input tuple combinations),
                  "how provenance" (ring operation on input tuples),
                  "where provenance" (defined at attribute level, not
                  just tuple level), "transformation provenance"
                  (includes query operators, not just input tuples).
                  In DD, represent collections by *traces of changes*.
                  Explanations are produced by running a different
                  computation on these changes in a backwards way.
                  The computation graph itself is used to compute the
                  explanations.  For each collection col compute a
                  col_q collection with records that need an
                  explanation.  The DD operators compute on tuples
                  (not on Z-sets), and maintain the provenance
                  information for each tuple; this also makes it easy
                  to invert them!  Projections must maintain state to
                  be invertible.  Track only input records with
                  timestamps less than the output timestamp.  This filters data significantly (the second derivation will not be shown).}
}

@article{mcsherry-vldb20,
  author =	 {McSherry, Frank and Lattuada, Andrea and
                  Schwarzkopf, Malte and Roscoe, Timothy},
  title =	 {Shared Arrangements: Practical Inter-Query Sharing
                  for Streaming Dataflows},
  year =	 2020,
  volume =	 13,
  number =	 10,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/3401960.3401974},
  abstract =	 {Current systems for data-parallel, incremental
                  processing and view maintenance over high-rate
                  streams isolate the execution of independent
                  queries. This creates unwanted redundancy and
                  overhead in the presence of concurrent incrementally
                  maintained queries: each query must independently
                  maintain the same indexed state over the same input
                  streams, and new queries must build this state from
                  scratch before they can begin to emit their first
                  results.This paper introduces shared arrangements:
                  indexed views of maintained state that allow
                  concurrent queries to reuse the same in-memory state
                  without compromising data-parallel performance and
                  scaling. We implement shared arrangements in a
                  modern stream processor and show order-of-magnitude
                  improvements in query response time and resource
                  consumption for incremental, interactive queries
                  against high-throughput streams, while also
                  significantly improving performance in other domains
                  including business analytics, graph processing, and
                  program analysis.},
  journal =	 {Proc. VLDB Endow.},
  month =	 jun,
  pages =	 {1793–1806},
  numpages =	 14,
  url2=		 {http://www.vldb.org/pvldb/vol13/p1793-mcsherry.pdf}
}

@Misc{ibm-systems,
  author =	 {{IBM Corp.}},
  title =	 {{IBM} {SystemS}},
  howpublished = {\url{https://researcher.watson.ibm.com/researcher/view_group_subpage.php?id=2534}},
  note =	 {Retrieved September 2021},
  abstract =	 {System S provides a programming model and an
                  execution platform for user-developed applications
                  that ingest, filter, analyze, and correlate
                  potentially massive volumes of continuous data
                  streams. It supports the composition of new
                  applications in the form of stream processing graphs
                  that can be created on the fly, mapped to a variety
                  hardware configurations, and adapted as requests
                  come and go. System S is designed to scale from
                  systems that acquire, analyze, interpret, and
                  organize continuous streams on a single processing
                  node, to high performance clusters of hundreds of
                  processing nodes. System S was designed to address
                  the following data management platform objectives:
                  Parallel and high performance stream processing
                  software platform capable of scaling over a range of
                  hardware capability Agile and automated
                  reconfiguration in response to changing user
                  objectives, available data, and the intrinsic
                  variability of system resource availability
                  Incremental tasking in the face of rapidly changing
                  data forms and types Multi-user, secure, and
                  auditable execution environment }
}

@inproceedings{bosboom-oopsla14,
  author =	 {Bosboom, Jeffrey and Rajadurai, Sumanaruban and
                  Wong, Weng-Fai and Amarasinghe, Saman},
  title =	 {{StreamJIT}: A Commensal Compiler for
                  High-Performance Stream Programming},
  year =	 2014,
  isbn =	 9781450325851,
  url =		 {https://doi.org/10.1145/2660193.2660236},
  abstract =	 {There are many domain libraries, but despite the
                  performance benefits of compilation, domain-specific
                  languages are comparatively rare due to the high
                  cost of implementing an optimizing compiler. We
                  propose commensal compilation, a new strategy for
                  compiling embedded domain-specific languages by
                  reusing the massive investment in modern language
                  virtual machine platforms. Commensal compilers use
                  the host language's front-end, use host platform
                  APIs that enable back-end optimizations by the host
                  platform JIT, and use an autotuner for optimization
                  selection. The cost of implementing a commensal
                  compiler is only the cost of implementing the
                  domain-specific optimizations. We demonstrate the
                  concept by implementing a commensal compiler for the
                  stream programming language StreamJIT atop the Java
                  platform. Our compiler achieves performance 2.8
                  times better than the StreamIt native code (via GCC)
                  compiler with considerably less implementation
                  effort.},
  booktitle =	 OOPSLA,
  pages =	 {177–195},
  numpages =	 19,
  keywords =	 {domain-specific languages, embedded domain-specific
                  languages},
  location =	 {Portland, Oregon, USA},
}

@Misc{telegraphcq,
  author =	 {{Telegraph Team, UC Berkeley}},
  title =	 {{TelegraphCQ} v2.1},
  howpublished = {\url{http://telegraph.cs.berkeley.edu/telegraphcq/v2.1}},
  note =	 {Retrieved September 2021},
  abstract =	 {TelegraphCQ is a system for processing long-running
                  continuous queries over data streams. It is
                  implemented as a significant modification to the
                  PostgreSQL open-source database
                  system. Specifically, TelegraphCQ v2.1 is based on
                  the PostgreSQL 7.3.2 code-base.}
}

@inproceedings{Chandrasekar-sigmod03,
  author =	 {Chandrasekaran, Sirish and Cooper, Owen and
                  Deshpande, Amol and Franklin, Michael J. and
                  Hellerstein, Joseph M. and Hong, Wei and
                  Krishnamurthy, Sailesh and Madden, Samuel R. and
                  Reiss, Fred and Shah, Mehul A.},
  title =	 {{TelegraphCQ}: Continuous Dataflow Processing},
  year =	 2003,
  publisher =	 {Association for Computing Machinery},
  address =	 {New York, NY, USA},
  url =		 {https://doi.org/10.1145/872757.872857},
  doi =		 {10.1145/872757.872857},
  booktitle =	 SIGMOD,
  pages =	 668,
  numpages =	 1,
  location =	 {San Diego, California},
  abstract =	 {Increasingly pervasive networks are leading towards
                  a world where data is constantly in motion. In such
                  a world, conventional techniques for query
                  processing, which were developed under the
                  assumption of a far more static and predictable
                  computational environment, will not be
                  sufficient. Instead, query processors based on
                  adaptive dataflow will be necessary. The Telegraph
                  project has developed a suite of novel technologies
                  for continuously adaptive query processing. The next
                  generation Telegraph system, called TelegraphCQ, is
                  focused on meeting the challenges that arise in
                  handling large streams of continuous queries over
                  high-volume, highly-variable data streams. In this
                  paper, we describe the system architecture and its
                  underlying technology, and report on our ongoing
                  implementation effort, which leverages the
                  PostgreSQL open source code base. We also discuss
                  open issues and our research agenda.},
  url =		 {http://db.cs.berkeley.edu/papers/cidr03-tcq.pdf},
}

@Misc{aurora,
  title =	 {The Aurora Project},
  howpublished = {\url{http://cs.brown.edu/research/aurora/}},
  month =	 {December},
  year =	 2004,
  abstract =	 {Applications that deal with continuous data streams
                  are becoming increasingly important primarily due to
                  the emergence of sensors and similar small-scale
                  embedded computing devices that continuously produce
                  large volumes of data they obtain from their
                  environment. Many stream-based applications, such as
                  environmental monitoring, surveillance, tracking,
                  plant maintenance, and telecommunications data
                  management, require the ability to handle huge
                  volumes of continuous data streams arriving in
                  real-time; to be able to function efficiently under
                  uncertainty in data, and to be able to deliver
                  results in a timely fashion. Existing database
                  management systems are inherently ill-equipped for
                  supporting such tasks, mainly because they are
                  designed based on the implicit assumption that the
                  system is a passive data repository storing a large
                  but finite collection of data elements, which are
                  processed in response to human-initiated
                  queries. Moreover, in many of these applications,
                  there is a need to ask questions that require
                  comparing and combining stored, historical data with
                  real-time streaming data. The primary goal of the
                  Auroraproject is to build a single infrastructure
                  that can efficiently and seamlessly meet the
                  requirements of such demanding applications. To this
                  end, we are currently critically rethinking many
                  existing data management and processing issues, as
                  well as developing new proactive data processing
                  concepts and techniques.  Aurora addresses three
                  broad application types in a single, unique
                  framework: Real-time monitoring applications
                  continuously monitor the present state of the world
                  and are, thus, interested in the most current data
                  as it arrives from the environment. In these
                  applications, there is little or no need (or time)
                  to store such data.  Archival applications are
                  typically interested in the past. They are primarily
                  concerned with processing large amounts of finite
                  data stored in a time-series repository.  Spanning
                  applications involve both the present and past
                  states of the world, requiring combining and
                  comparing incoming live data and stored historical
                  data. These applications are the most demanding as
                  there is a need to balance real-time requirements
                  with efficient processing of large amounts of
                  disk-resident data.  }
}

@inproceedings{carney-vldb02,
  author =	 {Carney, Don and \c{C}etintemel, U\c{g}ur and
                  Cherniack, Mitch and Convey, Christian and Lee,
                  Sangdon and Seidman, Greg and Stonebraker, Michael
                  and Tatbul, Nesime and Zdonik, Stan},
  title =	 {Monitoring Streams: A New Class of Data Management
                  Applications},
  year =	 2002,
  publisher =	 {VLDB Endowment},
  abstract =	 {This paper introduces monitoring applications, which
                  we will show differ substantially from conventional
                  business data processing. The fact that a software
                  system must process and react to continual inputs
                  from many sources (e.g., sensors) rather than from
                  human operators requires one to rethink the
                  fundamental architecture of a DBMS for this
                  application area. In this paper, we present Aurora,
                  a new DBMS that is currently under construction at
                  Brandeis University, Brown University, and M.I.T. We
                  describe the basic system architecture, a
                  stream-oriented set of operators, optimization
                  tactics, and support for real-time operation.},
  booktitle =	 VLDB,
  pages =	 {215–226},
  numpages =	 12,
  location =	 {Hong Kong, China}
}

@Misc{borealis,
  title =	 {{Borealis} Distributed Stream Processing Engine},
  howpublished = {\url{http://cs.brown.edu/research/borealis/public/}},
  year =	 2008,
}

@article{balazinska-tds08,
  author =	 {Balazinska, Magdalena and Balakrishnan, Hari and
                  Madden, Samuel R. and Stonebraker, Michael},
  title =	 {Fault-Tolerance in the {Borealis} Distributed Stream
                  Processing System},
  year =	 2008,
  issue_date =	 {March 2008},
  volume =	 33,
  number =	 1,
  issn =	 {0362-5915},
  url =		 {https://doi.org/10.1145/1331904.1331907},
  abstract =	 {Over the past few years, Stream Processing Engines
                  (SPEs) have emerged as a new class of software
                  systems, enabling low latency processing of streams
                  of data arriving at high rates. As SPEs mature and
                  get used in monitoring applications that must
                  continuously run (e.g., in network security
                  monitoring), a significant challenge arises: SPEs
                  must be able to handle various software and hardware
                  faults that occur, masking them to provide high
                  availability (HA). In this article, we develop,
                  implement, and evaluate DPC (Delay, Process, and
                  Correct), a protocol to handle crash failures of
                  processing nodes and network failures in a
                  distributed SPE.Like previous approaches to HA, DPC
                  uses replication and masks many types of node and
                  network failures. In the presence of network
                  partitions, the designer of any replication system
                  faces a choice between providing availability or
                  data consistency across the replicas. In DPC, this
                  choice is made explicit: the user specifies an
                  availability bound (no result should be delayed by
                  more than a specified delay threshold even under
                  failure if the corresponding input is available),
                  and DPC attempts to minimize the resulting
                  inconsistency between replicas (not all of which
                  might have seen the input data) while meeting the
                  given delay threshold.  Although conceptually
                  simple, the DPC protocol tolerates the occurrence of
                  multiple simultaneous failures as well as any
                  further failures that occur during recovery.This
                  article describes DPC and its implementation in the
                  Borealis SPE. We show that DPC enables a distributed
                  SPE to maintain low-latency processing at all times,
                  while also achieving eventual consistency, where
                  applications eventually receive the complete and
                  correct output streams. Furthermore, we show that,
                  independent of system size and failure location, it
                  is possible to handle failures almost up-to the
                  user-specified bound in a manner that meets the
                  required availability without introducing any
                  inconsistency.},
  journal =	 {ACM Trans. Database Syst.},
  month =	 mar,
  articleno =	 3,
  numpages =	 44,
  keywords =	 {fault-tolerance, consistency, availability,
                  Distributed stream processing}
}

@Misc{stream,
  title =	 {Stanford Stream Data Manager},
  howpublished = {\url{http://infolab.stanford.edu/stream/}},
  year =	 2006,
  abstract =	 {In applications such as network monitoring,
                  telecommunications data management, clickstream
                  monitoring, manufacturing, sensor networks, and
                  others, data takes the form of continuous data
                  streams rather than finite stored data sets, and
                  clients require long-running continuous queries as
                  opposed to one-time queries. Traditional database
                  systems and data processing algorithms are
                  ill-equipped to handle complex and numerous
                  continuous queries over data streams, and many
                  aspects of data management and processing need to be
                  reconsidered in their presence. In the STREAM
                  project, we are reinvestigating data management and
                  query processing in the presence of multiple,
                  continuous, rapid, time-varying data streams. We are
                  attacking problems ranging from basic theory results
                  to algorithms to implementing a comprehensive
                  prototype data stream management system.}
}

@techreport{arasu-tr02,
  number =	 {2002-57},
  author =	 {Arvind Arasu and Shivnath Babu and Jennifer Widom},
  title =	 {An Abstract Semantics and Concrete Language for
                  Continuous Queries over Streams and Relations},
  type =	 {Technical Report},
  publisher =	 {Stanford},
  institution =	 {Stanford InfoLab},
  year =	 2002,
  keywords =	 {Data Streams, Query Language, Semantics, Continuous
                  Queries },
  url =		 {http://ilpubs.stanford.edu:8090/563/},
  abstract =	 {Despite the recent surge of research in query
                  processing over data streams, little attention has
                  been devoted to defining precise semantics for
                  continuous queries over streams.  We first present
                  an abstract semantics based on several building
                  blocks: formal definitions for streams and
                  relations, mappings among them, and any relational
                  query language.  From these basics we define a
                  precise interpretation for continuous queries over
                  streams and relations. We then propose a concrete
                  language, CQL (for Continuous Query Language), which
                  instantiates the abstract semantics using SQL as the
                  relational query language and window specifications
                  derived from SQL-99 to map from streams to
                  relations.  We identify some equivalences that can
                  be used to rewrite CQL queries for optimization, and
                  we discuss some additional implementation issues
                  arising from the language and its semantics.  We
                  have implemented a substantial fraction of CQL in a
                  Data Stream Management System at Stanford, and we
                  have developed a public repository of data stream
                  applications that includes a wide variety of queries
                  expressed in CQL. }
}

@Misc{activemq,
  title =	 {{ActiveMQ}},
  howpublished = {\url{https://activemq.apache.org/}},
  year =	 {Retrieved September 2021},
  abstract =	 {Apache ActiveMQ® is the most popular open source,
                  multi-protocol, Java-based message broker. It
                  supports industry standard protocols so users get
                  the benefits of client choices across a broad range
                  of languages and platforms. Connect from clients
                  written in JavaScript, C, C++, Python, .Net, and
                  more. Integrate your multi-platform applications
                  using the ubiquitous AMQP protocol. Exchange
                  messages between your web applications using STOMP
                  over websockets. Manage your IoT devices using
                  MQTT. Support your existing JMS infrastructure and
                  beyond. ActiveMQ offers the power and flexibility to
                  support any messaging use-case.}
}

@Misc{kafka,
  title =	 {Apache {Kafka}},
  howpublished = {\url{https://kafka.apache.org/}},
  year =	 {Retrieved September 2021},
  abstract =	 {Apache Kafka is an open-source distributed event
                  streaming platform used by thousands of companies
                  for high-performance data pipelines, streaming
                  analytics, data integration, and mission-critical
                  applications.}
}

@inproceedings{wang-sigmod21,
  author =	 {Wang, Guozhang and Chen, Lei and Dikshit, Ayusman
                  and Gustafson, Jason and Chen, Boyang and Sax,
                  Matthias J. and Roesler, John and Blee-Goldman,
                  Sophie and Cadonna, Bruno and Mehta, Apurva and
                  Madan, Varun and Rao, Jun},
  title =	 {Consistency and Completeness: Rethinking Distributed
                  Stream Processing in Apache Kafka},
  year =	 2021,
  url =		 {https://doi.org/10.1145/3448016.3457556},
  abstract =	 {An increasingly important system requirement for
                  distributed stream processing applications is to
                  provide strong correctness guarantees under
                  unexpected failures and out-of-order data so that
                  its results can be authoritative (not needing
                  complementary batch results).  Although existing
                  systems have put a lot of effort into addressing
                  some specific issues, such as consistency and
                  completeness, how to enable users to make flexible
                  and transparent trade-off decisions among
                  correctness, performance, and cost still remains a
                  practical challenge. Specifically, similar
                  mechanisms are usually applied to tackle both
                  consistency and completeness, which can result in
                  unnecessary performance penalties. We present Apache
                  Kafka's core design for stream processing, which
                  relies on its persistent log architecture as the
                  storage and inter-processor communication layers to
                  achieve correctness guarantees. Kafka Streams, a
                  scalable stream processing client library in Apache
                  Kafka, defines the processing logic as
                  read-process-write cycles in which all processing
                  state updates and result outputs are captured as log
                  appends. Idempotent and transactional write
                  protocols are utilized to guarantee exactly-once
                  semantics. Furthermore, revision-based speculative
                  processing is employed to emit results as soon as
                  possible while handling out-of-order data. We also
                  demonstrate how Kafka Streams behaves in practice
                  with large-scale deployments and performance
                  insights exhibiting its flexible and low-overhead
                  trade-offs.},
  booktitle =	 SIGMOD,
  pages =	 {2602–2613},
  numpages =	 12,
  keywords =	 {stream processing, semantics},
  location =	 {Virtual Event, China},
  series =	 {SIGMOD/PODS '21}
}

@Misc{storm,
  title =	 {{Apache} {Storm}},
  howpublished = {\url{https://storm.apache.org/}},
  year =	 {Retrieved September 2021},
  abstract =	 {Apache Storm is a free and open source distributed
                  realtime computation system. Apache Storm makes it
                  easy to reliably process unbounded streams of data,
                  doing for realtime processing what Hadoop did for
                  batch processing. Apache Storm is simple, can be
                  used with any programming language, and is a lot of
                  fun to use!  Apache Storm has many use cases:
                  realtime analytics, online machine learning,
                  continuous computation, distributed RPC, ETL, and
                  more. Apache Storm is fast: a benchmark clocked it
                  at over a million tuples processed per second per
                  node. It is scalable, fault-tolerant, guarantees
                  your data will be processed, and is easy to set up
                  and operate.  Apache Storm integrates with the
                  queueing and database technologies you already
                  use. An Apache Storm topology consumes streams of
                  data and processes those streams in arbitrarily
                  complex ways, repartitioning the streams between
                  each stage of the computation however needed. Read
                  more in the tutorial.  }
}

@article{carbone-ieee15,
  title =	 {Apache {Flink}: Stream and batch processing in a
                  single engine},
  author =	 {Carbone, Paris and Katsifodimos, Asterios and Ewen,
                  Stephan and Markl, Volker and Haridi, Seif and
                  Tzoumas, Kostas},
  journal =	 {Bulletin of the IEEE Computer Society Technical
                  Committee on Data Engineering},
  volume =	 36,
  number =	 4,
  year =	 2015,
  publisher =	 {IEEE Computer Society},
  abstract =	 {Apache Flink is an open-source system for processing
                  streaming and batch data. Flink is built on the
                  philosophy that many classes of data processing
                  applications, including real-time analytics,
                  continu- ous data pipelines, historic data
                  processing (batch), and iterative algorithms
                  (machine learning, graph analysis) can be expressed
                  and executed as pipelined fault-tolerant
                  dataflows. In this paper, we present Flink’s
                  architecture and expand on how a (seemingly diverse)
                  set of use cases can be unified under a single
                  execution model.},
  url =          {https://www.diva-portal.org/smash/get/diva2:1059537/FULLTEXT01.pdf}
}

@inproceedings{zaharia-sosp13,
  author =	 {Zaharia, Matei and Das, Tathagata and Li, Haoyuan
                  and Hunter, Timothy and Shenker, Scott and Stoica,
                  Ion},
  title =	 {Discretized Streams: Fault-Tolerant Streaming
                  Computation at Scale},
  year =	 2013,
  url =		 {https://doi.org/10.1145/2517349.2522737},
  abstract =	 {Many "big data" applications must act on data in
                  real time. Running these applications at ever-larger
                  scales requires parallel platforms that
                  automatically handle faults and
                  stragglers. Unfortunately, current distributed
                  stream processing models provide fault recovery in
                  an expensive manner, requiring hot replication or
                  long recovery times, and do not handle
                  stragglers. We propose a new processing model,
                  discretized streams (D-Streams), that overcomes
                  these challenges. D-Streams enable a parallel
                  recovery mechanism that improves efficiency over
                  traditional replication and backup schemes, and
                  tolerates stragglers. We show that they support a
                  rich set of operators while attaining high per-node
                  throughput similar to single-node systems, linear
                  scaling to 100 nodes, sub-second latency, and
                  sub-second fault recovery. Finally, D-Streams can
                  easily be composed with batch and interactive query
                  models like MapReduce, enabling rich applications
                  that combine these modes. We implement D-Streams in
                  a system called Spark Streaming.},
  booktitle =	 SOSP,
  pages =	 {423–438},
  numpages =	 16,
  location =	 {Farminton, Pennsylvania},
  series =	 {SOSP '13}
}

@InProceedings{kahn-ifip74,
  title =	 {The semantics of a simple language for parallel
                  programming},
  year =	 1974,
  author =	 {Gilles Kahn},
  booktitle =	 {{IFIP} Congress on Information Processing},
  url =          {http://www1.cs.columbia.edu/~sedwards/papers/kahn1974semantics.pdf},
  abstract =	 {In this paper we describe a simple language for
                  parallel programming.  Its semantics is studied
                  thoroughly.  The desirable property of this language
                  and its defficiencies are exhibited by this
                  theoretical study. Basic results on parallel program
                  schemata are given. We hope in this way to make a
                  case for a more formal (i.e. mathematical) approach
                  to the design of languages for systems programming
                  and the design of operating systems.}
}

@article{lee-ieee95,
  author =	 {Lee, Edward A. and Parks, Thomas M.},
  title =	 {Dataflow Process Networks},
  year =	 1995,
  abstract =	 {We review a model of computation used in industrial
                  practice in signal processing software environments
                  and experimentally in other contexts. We give this
                  model the name "dataflow process networks," and
                  study its formal properties as well as its utility
                  as a basis for programming language design. Variants
                  of this model are used in commercial visual
                  programming systems such as SPW from the Alta Group
                  of Cadence (formerly Comdisco Systems), COSSAP from
                  Synopsys (formerly Cadis), the DSP station from
                  Mentor Graphics, and Hypersignal from
                  Hyperception. They are also used in research
                  software such as Khoros from the University of New
                  Mexico and Ptolemy from the University of California
                  at Berkeley, among many others.Dataflow process
                  networks are shown to be a special case of Kahn
                  process networks, a model of computation where a
                  number of concurrent processes communicate through
                  unidirectional FIFO Channels, where writes to the
                  channel are nonblocking, and reads are blocking. In
                  dataflow process networks, each process consists of
                  repeated "firings" of a dataflow "actor". An actor
                  defines a (often functional) quantum of
                  computation. By dividing processes into actor
                  firings, the considerable overhead of context
                  switching incurred in most implementations of Kahn
                  process networks is avoided. We relate dataflow
                  networks to other dataflow models, including those
                  used in dataflow machines, such as static dataflow
                  and the tagged-token model. We also relate dataflow
                  process networks to functional languages such as
                  Haskell, and show that modern language concepts such
                  as higher-order functions and polymorphism can be
                  used effectively in dataflow process networks. A
                  number of programming examples using a visual syntax
                  are given.},
  journal =	 {Proceedings of the IEEE},
  pages =	 {773-801},
  month =	 {May},
  vol =		 83,
  no =		 5,
  numpages =	 27,
  url =     {https://ptolemy.berkeley.edu/publications/papers/95/processNets/}
}

@inproceedings{zhao-ppdp21,
  author =	 {Zhao, David and Subotic, Pavle and Raghothaman,
                  Mukund and Scholz, Bernhard},
  title =	 {Towards Elastic Incrementalization for {Datalog}},
  year =	 2021,
  url =		 {https://doi.org/10.1145/3479394.3479415},
  abstract =	 { Various incremental evaluation strategies for
                  Datalog have been developed that reuse computations
                  for small input changes. These methods assume that
                  incrementalization is always a better strategy than
                  recomputation. However, in real-world applications
                  such as static program analysis, recomputation can
                  be cheaper than incrementalization for large
                  updates. This work introduces an elastic incremental
                  approach with two strategies that can be selected
                  based on the impact of the input change. The first
                  strategy is a Bootstrap strategy that recomputes the
                  entire result for high-impact changes. The second is
                  an Update strategy that performs an incremental
                  update for low-impact changes.  Our approach allows
                  for a lightweight Bootstrap strategy suitable for
                  high-impact changes, with the trade-off that Update
                  may require more work for small changes. We
                  demonstrate our approach using real-world
                  applications and compare our elastic incremental
                  approach to existing methods.},
  booktitle =	 PPDP,
  articleno =	 20,
  numpages =	 16,
  keywords =	 {Datalog compilers, provenance, incremental
                  evaluation, Datalog},
  location =	 {Tallinn, Estonia},
}

@article{chen-sigmod00,
  author =	 {Chen, Jianjun and DeWitt, David J. and Tian, Feng
                  and Wang, Yuan},
  title =	 {{NiagaraCQ}: A Scalable Continuous Query System for
                  Internet Databases},
  year =	 2000,
  volume =	 29,
  number =	 2,
  url =		 {https://doi.org/10.1145/335191.335432},
  abstract =	 {Continuous queries are persistent queries that allow
                  users to receive new results when they become
                  available. While continuous query systems can
                  transform a passive web into an active environment,
                  they need to be able to support millions of queries
                  due to the scale of the Internet. No existing
                  systems have achieved this level of
                  scalability. NiagaraCQ addresses this problem by
                  grouping continuous queries based on the observation
                  that many web queries share similar
                  structures. Grouped queries can share the common
                  computation, tend to fit in memory and can reduce
                  the I/O cost significantly. Furthermore, grouping on
                  selection predicates can eliminate a large number of
                  unnecessary query invocations. Our grouping
                  technique is distinguished from previous group
                  optimization approaches in the following
                  ways. First, we use an incremental group
                  optimization strategy with dynamic re-grouping. New
                  queries are added to existing query groups, without
                  having to regroup already installed queries. Second,
                  we use a query-split scheme that requires minimal
                  changes to a general-purpose query engine.  Third,
                  NiagaraCQ groups both change-based and timer-based
                  queries in a uniform way.  To insure that NiagaraCQ
                  is scalable, we have also employed other techniques
                  including incremental evaluation of continuous
                  queries, use of both pull and push models for
                  detecting heterogeneous data source changes, and
                  memory caching. This paper presents the design of
                  NiagaraCQ system and gives some experimental results
                  on the system's performance and scalability.},
  journal =	 {SIGMOD Rec.},
  month =	 may,
  pages =	 {379–390},
  numpages =	 12
}

@inproceedings{shkapsky-sigmod16,
  author =	 {Shkapsky, Alexander and Yang, Mohan and Interlandi,
                  Matteo and Chiu, Hsuan and Condie, Tyson and
                  Zaniolo, Carlo},
  title =	 {Big Data Analytics with Datalog Queries on Spark},
  year =	 2016,
  url =		 {https://doi.org/10.1145/2882903.2915229},
  abstract =	 {There is great interest in exploiting the
                  opportunity provided by cloud computing platforms
                  for large-scale analytics. Among these platforms,
                  Apache Spark is growing in popularity for machine
                  learning and graph analytics. Developing efficient
                  complex analytics in Spark requires deep
                  understanding of both the algorithm at hand and the
                  Spark API or subsystem APIs (e.g., Spark SQL,
                  GraphX). Our BigDatalog system addresses the problem
                  by providing concise declarative specification of
                  complex queries amenable to efficient
                  evaluation. Towards this goal, we propose
                  compilation and optimization techniques that tackle
                  the important problem of efficiently supporting
                  recursion in Spark. We perform an experimental
                  comparison with other state-of-the-art large-scale
                  Datalog systems and verify the efficacy of our
                  techniques and effectiveness of Spark in supporting
                  Datalog-based analytics.},
  booktitle =	 SIGMOR,
  pages =	 {1135–1149},
  numpages =	 15,
  keywords =	 {monotonic aggregates, spark, recursive queries,
                  datalog},
  location =	 {San Francisco, California, USA},
}

@Misc{dlv,
  title =	 {DLV},
  howpublished = {http://www.dlvsystem.com/},
  note =	 {Retrieved October 2021},
  abstract =	 {DLV is an artificial intelligence system based on
                  disjunctive logic programming, which offers
                  front-ends to several advanced KR formalisms.}
}

@inproceedings{wang-cidr17,
  author =	 {Jingjing Wang and Tobin Baker and Magdalena
                  Balazinska and Daniel Halperin and Brandon Haynes
                  and Bill Howe and Dylan Hutchison and Shrainik Jain
                  and Ryan Maas and Parmita Mehta and Dominik Moritz
                  and Brandon Myers and Jennifer Ortiz and Dan Suciu
                  and Andrew Whitaker and Shengliang Xu},
  title =	 {The {Myria} Big Data Management and Analytics System
                  and Cloud Services},
  booktitle =	 CIDR,
  address =	 {Chaminade, CA},
  month =	 {January 8-11},
  publisher =	 {www.cidrdb.org},
  year =	 2017,
  url =          {http://cidrdb.org/cidr2017/papers/p37-wang-cidr17.pdf},
  abstract =	 {In this paper, we present an overview of the Myria
                  stack for big data management and analytics that we
                  developed in the database group at the University of
                  Washington and that we have been operating as a
                  cloud service aimed at domain scientists around the
                  UW campus. We highlight Myria’s key design choices
                  and innovations and report on our experience with
                  using Myria for various data science use-cases.}
}

@article{seo-tkde15,
  title =	 "{SociaLite}: An Efficient Graph Query Language Based
                  on {Datalog}",
  abstract =	 "With the rise of social networks, large-scale graph
                  analysis becomes increasingly important. Because SQL
                  lacks the expressiveness and performance needed for
                  graph algorithms, lower-level, general-purpose
                  languages are often used instead. For greater ease
                  of use and efficiency, we propose SociaLite, a
                  high-level graph query language based on Datalog. As
                  a logic programming language, Datalog allows many
                  graph algorithms to be expressed
                  succinctly. However, its performance has not been
                  competitive when compared to low-level
                  languages. With SociaLite, users can provide
                  high-level hints on the data layout and evaluation
                  order; they can also define recursive aggregate
                  functions which, as long as they are meet
                  operations, can be evaluated incrementally and
                  efficiently. Moreover, recursive aggregate functions
                  make it possible to implement more graph algorithms
                  that cannot be implemented in Datalog. We evaluated
                  SociaLite by running nine graph algorithms in total;
                  eight for social network analysis (shortest paths,
                  PageRank, hubs and authorities, mutual neighbors,
                  connected components, triangles, clustering
                  coefficients, and betweenness centrality) and one
                  for biological network analysis (Eulerian
                  cycles). We use two real-life social graphs,
                  LiveJournal and Last.fm, for the evaluation as well
                  as one synthetic graph. The optimizations proposed
                  in this paper speed up almost all the algorithms by
                  3 to 22 times. SociaLite even outperforms typical
                  Java implementations by an average of 50 percent for
                  the graph algorithms tested. When compared to highly
                  optimized Java implementations, SociaLite programs
                  are an order of magnitude more succinct and easier
                  to write. Its performance is competitive, with only
                  16 percent overhead for the largest benchmark, and
                  25 percent overhead for the worst case
                  benchmark. Most importantly, being a query language,
                  SociaLite enables many more users who are not
                  proficient in software engineering to perform
                  network analysis easily and efficiently.",
  keywords =	 "Datalog, Graph Algorithms, Query Languages",
  author =	 "Jiwon Seo and Stephen Guo and Lam, {Monica S.}",
  year =	 2015,
  month =	 jul,
  day =		 1,
  doi =		 "10.1109/TKDE.2015.2405562",
  volume =	 27,
  pages =	 "1824--1837",
  journal =	 "IEEE Transactions on Knowledge and Data Engineering",
  issn =	 "1041-4347",
  number =	 7
}
@article{fan-vldb19,
  author =	 {Fan, Zhiwei and Zhu, Jianqiao and Zhang, Zuyu and
                  Albarghouthi, Aws and Koutris, Paraschos and Patel,
                  Jignesh M.},
  title =	 {Scaling-up in-Memory {Datalog} Processing:
                  Observations and Techniques},
  year =	 2019,
  issue_date =	 {February 2019},
  publisher =	 {VLDB Endowment},
  volume =	 12,
  number =	 6,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/3311880.3311886},
  abstract =	 {Recursive query processing has experienced a recent
                  resurgence, as a result of its use in many modern
                  application domains, including data integration,
                  graph analytics, security, program analysis,
                  networking and decision making. Due to the large
                  volumes of data being processed, several research
                  efforts across multiple communities have explored
                  how to scale up recursive queries, typically
                  expressed in Datalog. Our experience with these
                  tools indicate that their performance does not
                  translate across domains---e.g., a tool designed for
                  large-scale graph analytics does not exhibit the
                  same performance on program-analysis tasks, and vice
                  versa.Starting from the above observation, we make
                  the following two contributions. First, we perform a
                  detailed experimental evaluation comparing a number
                  of state-of-the-art Datalog systems on a wide
                  spectrum of graph analytics and program-analysis
                  tasks, and summarize the pros and cons of existing
                  techniques. Second, we design and implement our own
                  general-purpose Datalog engine, called RecStep, on
                  top of a parallel single-node relational system. We
                  outline the techniques we applied on RecStep, as
                  well as the contribution of each technique to the
                  overall performance. Using RecStep as a baseline, we
                  demonstrate that it generally out-performs
                  state-of-the-art parallel Datalog engines on complex
                  and large-scale Datalog evaluation, by a 4-6X
                  margin. An additional insight from our work is that
                  it is possible to build a high-performance Datalog
                  system on top of a relational engine, an idea that
                  has been dismissed in past work.},
  journal =	 {Proc. VLDB Endow.},
  month =	 feb,
  pages =	 {695–708},
  numpages =	 14
}

@inproceedings{gonzalez-osdi14,
  author =	 {Gonzalez, Joseph E. and Xin, Reynold S. and Dave,
                  Ankur and Crankshaw, Daniel and Franklin, Michael
                  J. and Stoica, Ion},
  title =	 {{GraphX}: Graph Processing in a Distributed Dataflow
                  Framework},
  year =	 2014,
  abstract =	 {In pursuit of graph processing performance, the
                  systems community has largely abandoned
                  general-purpose distributed dataflow frameworks in
                  favor of specialized graph processing systems that
                  provide tailored programming abstractions and
                  accelerate the execution of iterative graph
                  algorithms. In this paper we argue that many of the
                  advantages of specialized graph processing systems
                  can be recovered in a modern general-purpose
                  distributed dataflow system. We introduce GraphX, an
                  embedded graph processing framework built on top of
                  Apache Spark, a widely used distributed dataflow
                  system. GraphX presents a familiar composable graph
                  abstraction that is sufficient to express existing
                  graph APIs, yet can be implemented using only a few
                  basic dataflow operators (e.g., join, map,
                  group-by). To achieve performance parity with
                  specialized graph systems, GraphX recasts
                  graph-specific optimizations as distributed join
                  optimizations and materialized view maintenance. By
                  leveraging advances in distributed dataflow
                  frameworks, GraphX brings low-cost fault tolerance
                  to graph processing. We evaluate GraphX on real
                  workloads and demonstrate that GraphX achieves an
                  order of magnitude performance gain over the base
                  dataflow framework and matches the performance of
                  specialized graph processing systems while enabling
                  a wider range of computation.},
  booktitle =	 OSDI,
  pages =	 {599–613},
  numpages =	 15,
  location =	 {Broomfield, CO}
}

@book{Abiteboul-book95,
  author    = {Serge Abiteboul and
               Richard Hull and
               Victor Vianu},
  title     = {Foundations of Databases},
  publisher = {Addison-Wesley},
  year      = {1995},
  url       = {http://webdam.inria.fr/Alice/},
  isbn      = {0-201-53771-0},
}

@article{greco-sldm15,
  author =	 {Greco, Sergio and Molinaro, Cristian},
  title =	 {Datalog and Logic Databases},
  journal =	 {Synthesis Lectures on Data Management},
  volume =	 7,
  number =	 2,
  pages =	 {1-169},
  year =	 2015,
  url =		 {https://doi.org/10.2200/S00648ED1V01Y201505DTM041},
  abstract =	 {The use of logic in databases started in the late
                  1960s. In the early 1970s Codd formalized databases
                  in terms of the relational calculus and the
                  relational algebra. A major influence on the use of
                  logic in databases was the development of the field
                  of logic programming. Logic provides a convenient
                  formalism for studying classical database problems
                  and has the important property of being declarative,
                  that is, it allows one to express what she wants
                  rather than how to get it.  For a long time,
                  relational calculus and algebra were considered the
                  relational database languages. However, there are
                  simple operations, such as computing the transitive
                  closure of a graph, which cannot be expressed with
                  these languages. Datalog is a declarative query
                  language for relational databases based on the logic
                  programming paradigm. One of the peculiarities that
                  distinguishes Datalog from query languages like
                  relational algebra and calculus is recursion, which
                  gives Datalog the capability to express queries like
                  computing a graph transitive closure.  Recent years
                  have witnessed a revival of interest in Datalog in a
                  variety of emerging application domains such as
                  data integration, information extraction,
                  networking, program analysis, security, cloud
                  computing, ontology reasoning, and many others. The
                  aim of this book is to present the basics of
                  Datalog, some of its extensions, and recent
                  applications to different domains.}
}

@InProceedings{picallo-scop19,
  author =	 "Alvarez-Picallo, Mario and Eyers-Taylor, Alex and
                  Peyton Jones, Michael and Ong, C.-H. Luke",
  title =	 "Fixing Incremental Computation",
  booktitle =	 "European Symposium on Programming Languages and
                  Systems (ESOP)",
  year =	 2019,
  pages =	 "525--552",
  abstract =	 "Incremental computation has recently been studied
                  using the concepts of change structures and
                  derivatives of programs, where the derivative of a
                  function allows updating the output of the function
                  based on a change to its input. We generalise change
                  structures to change actions, and study their
                  algebraic properties. We develop change actions for
                  common structures in computer science, including
                  directed-complete partial orders and Boolean
                  algebras. We then show how to compute derivatives of
                  fixpoints. This allows us to perform incremental
                  evaluation and maintenance of recursively defined
                  functions with particular application generalised
                  Datalog programs. Moreover, unlike previous results,
                  our techniques are modular in that they are easy to
                  apply both to variants of Datalog and to other
                  programming languages.",
  url =		 {https://link.springer.com/chapter/10.1007/978-3-030-17184-1_19}
}

@inproceedings{gupta-idb93,
  author =	 {Gupta, Ashish and Mumick, Inderpal Singh and
                  Subrahmanian, V. S.},
  title =	 {Maintaining Views Incrementally},
  year =	 1993,
  url =		 {https://doi.org/10.1145/170035.170066},
  abstract =	 {We present incremental evaluation algorithms to
                  compute changes to materialized views in relational
                  and deductive database systems, in response to
                  changes (insertions, deletions, and updates) to the
                  relations. The view definitions can be in SQL or
                  Datalog, and may use UNION, negation, aggregation
                  (e.g. SUM, MIN), linear recursion, and general
                  recursion.We first present a counting algorithm that
                  tracks the number of alternative derivations
                  (counts) for each derived tuple in a view. The
                  algorithm works with both set and duplicate
                  semantics. We present the algorithm for nonrecursive
                  views (with negation and aggregation), and show that
                  the count for a tuple can be computed at little or
                  no cost above the cost of deriving the tuple. The
                  algorithm is optimal in that it computes exactly
                  those view tuples that are inserted or deleted. Note
                  that we store only the number of derivations, not
                  the derivations themselves.We then present the
                  Delete and Rederive algorithm, DRed, for incremental
                  maintenance of recursive views (negation and
                  aggregation are permitted). The algorithm works by
                  first deleting a superset of the tuples that need to
                  be deleted, and then rederiving some of them. The
                  algorithm can also be used when the view definition
                  is itself altered.},
  booktitle =	 {Proceedings of the 1993 ACM SIGMOD International
                  Conference on Management of Data},
  pages =	 {157–166},
  numpages =	 10,
  address =	 {Washington, D.C., USA},
  series =	 {SIGMOD '93}
}


@article{gupta-idb95,
  title =	 {Maintenance of materialized views: Problems,
                  techniques, and applications},
  author =	 {Gupta, Ashish and Mumick, Inderpal Singh and others},
  journal =	 {IEEE Data Eng. Bull.},
  volume =	 18,
  number =	 2,
  pages =	 {3--18},
  year =	 1995,
  abstract =	 {In this paper we motivate and describe materialized
                  views, their applications, and the problems and
                  techniques for their maintenance.  We present a
                  taxonomy of view maintenance problems based upon the
                  class of views considered, upon the resources used
                  to maintain the view, upon the types of
                  modifications to the base data that are considered
                  during maintenance, and whether the technique works
                  for all instances of databases or modifications.  We
                  describe some view maintenance techniques proposed
                  in the literature in terms of our taxonomy.
                  Finally, we consider new and promising application
                  domains that are likely to drive work in
                  materialized views and view maintenance.}
}

@InProceedings{lee-ifip93,
  author =	 {Edward A. Lee},
  title =	 {Multidimensional Streams Rooted in Dataflow},
  booktitle =	 {{IFIP} Working Conference on Architectures and
                  Compilation Techniques for Fine and Medium Grain
                  Parallelism},
  year =	 1993,
  month =	 {January 20-22},
  address =	 {Orlando, FL},
  abstract =	 {A programming model rooted in dataflow principles
                  and supporting multidimensional streams is developed
                  and compared with streams in Lucid, Lustre, Signal,
                  Silage, Sisal, and Id. The model is based on
                  production and consumption of tokens and requires
                  neither the "clock" synchronization of Lustre and
                  Signal nor the Lisp-like list model of Sisal and
                  Id. Multidimensional streams are similar to those in
                  Lucid, but augmented with a producer/consumer model
                  derived from our earlier work on one-dimensional
                  dataflow models. Analytical properties of programs
                  are easily derived, and compile-time predictability
                  is maximized so that run-time overhead costs can be
                  minimized. The scheme is illustrated by building
                  scalable fine-grain programs for some simple
                  examples and showing how data and function
                  parallelism can be automatically exploited given
                  these descriptions.},
  url =          {https://ptolemy.berkeley.edu/publications/papers/93/mdsdf/}
}

@article{khamis-arxiv21,
  author =	 {Mahmoud Abo Khamis and Hung Q. Ngo and Reinhard
                  Pichler and Dan Suciu and Yisu Remy Wang},
  title =	 {Convergence of Datalog over (Pre-) Semirings},
  journal =	 {CoRR},
  volume =	 {abs/2105.14435},
  year =	 2021,
  url =		 {https://arxiv.org/abs/2105.14435},
  eprinttype =	 {arXiv},
  eprint =	 {2105.14435},
  timestamp =	 {Wed, 02 Jun 2021 11:46:42 +0200},
  biburl =       {https://dblp.org/rec/journals/corr/abs-2105-14435.bib},
  bibsource =	 {dblp computer science bibliography,
                  https://dblp.org},
  comments =	 {Recursive queries have been traditionally studied in
                  the framework of Datalog, a language that restricts
                  recursion to monotone queries over sets, which is
                  guaranteed to converge in polynomial time in the
                  size of the input. But modern big data systems
                  require recursive computations beyond the Boolean
                  space. In this paper we study the convergence of
                  Datalog when it is interpreted over an arbitrary
                  semiring. We consider an ordered semiring, define
                  the semantics of a Datalog program as a least
                  fixpoint in this semiring, and study the number of
                  steps required to reach that fixpoint, if ever. We
                  identify algebraic properties of the semiring that
                  correspond to certain convergence properties of
                  Datalog programs. Finally, we describe a class of
                  ordered semirings on which one can use the
                  semi-naive evaluation algorithm on any Datalog
                  program. }
}

@inproceedings{szabo-pldi21,
  author =	 {Szab\'{o}, Tam\'{a}s and Erdweg, Sebastian and
                  Bergmann, G\'{a}bor},
  title =	 {Incremental Whole-Program Analysis in Datalog with
                  Lattices},
  year =	 2021,
  url =		 {https://doi.org/10.1145/3453483.3454026},
  abstract =	 {Incremental static analyses provide up-to-date
                  analysis results in time proportional to the size of
                  a code change, not the entire code base. This
                  promises fast feedback to programmers in IDEs and
                  when checking in commits. However, existing
                  incremental analysis frameworks fail to deliver on
                  this promise for whole-program lattice-based
                  data-flow analyses. In particular, prior
                  Datalog-based frameworks yield good incremental
                  performance only for intra-procedural analyses.  In
                  this paper, we first present a methodology to
                  empirically test if a computation is amenable to
                  incrementalization. Using this methodology, we find
                  that incremental whole-program analysis may be
                  possible. Second, we present a new incremental
                  Datalog solver called LADDDER to eliminate the
                  shortcomings of prior Datalog-based analysis
                  frameworks. Our Datalog solver uses a non-standard
                  aggregation semantics which allows us to loosen
                  monotonicity requirements on analyses and to improve
                  the performance of lattice aggregators
                  considerably. Our evaluation on real-world Java code
                  confirms that LADDDER provides up-to-date points-to,
                  constant propagation, and interval information in
                  milliseconds.},
  booktitle =	 PLDI,
  pages =	 {1–15},
  numpages =	 15,
  keywords =	 {Datalog, Incremental Computing, Static Analysis},
  address =	 {Virtual, Canada},
  series =	 {PLDI 2021}
}

@article{motik-ai19,
  author =	 {Boris Motik and Yavor Nenov and Robert Piro and Ian
                  Horrocks},
  title =	 {Maintenance of {Datalog} materialisations revisited},
  journal =	 {Artif. Intell.},
  volume =	 269,
  pages =	 {76--136},
  year =	 2019,
  url =		 {https://doi.org/10.1016/j.artint.2018.12.004},
  abstract =	 {Datalog is a rule-based formalism that can
                  axiomatise recursive properties such as reachability
                  and transitive closure. Datalog implementations
                  often materialise (i.e., precompute and store) all
                  facts entailed by a Datalog program and a set of
                  explicit facts.  Queries can thus be answered
                  directly in the materialised facts, which is
                  beneficial to the performance of query answering,
                  but the materialised facts must be updated whenever
                  the explicit facts change. Rematerialising all facts
                  ‘from scratch’ can be very inefficient, so numerous
                  materialisation maintenance algorithms have been
                  developed that aim to efficiently identify the facts
                  that require updating and thus reduce the overall
                  work. Most such approaches are variants of the
                  counting or Delete/Rederive (DRed)
                  algorithms. Algorithms in the former group maintain
                  additional data structures and are usually
                  applicable only if Datalog rules are not recursive,
                  which limits their applicability in
                  practice. Algorithms in the latter group do not
                  require additional data structures and can handle
                  recursive rules, but they can be inefficient when
                  facts have multiple derivations. Finally, to the
                  best of our knowledge, these approaches have not
                  been compared and their practical applicability has
                  not been investigated.  Datalog is becoming
                  increasingly important in practice, so a more
                  comprehensive understanding of the tradeoffs between
                  different approaches to materialisation maintenance
                  is needed. In this paper we present three such
                  algorithms for Datalog with stratified negation: a
                  new counting algorithm that can handle recursive
                  rules, an optimised variant of the DRed algorithm
                  that does not repeat derivations, and a new
                  Forward/Backward/Forward (FBF) algorithm that
                  extends DRed to better handle facts with multiple
                  derivations. Furthermore, we study the worst-case
                  performance of these algorithms and compare the
                  algorithms’ behaviour on several examples. Finally,
                  we present the results of an extensive,
                  first-of-a-kind empirical evaluation in which we
                  investigate the robustness and the scaling behaviour
                  of our algorithms. We thus provide important
                  theoretical and practical insights into all three
                  algorithms that will provide invaluable guidance to
                  future implementors of Datalog systems.},
  url2 =         {https://www.cs.ox.ac.uk/people/boris.motik/pubs/mnph19maintenance-revisited.pdf}
}

@article{zaniolo-tplp17,
  title =	 {Fixpoint semantics and optimization of recursive
                  datalog programs with aggregates},
  author =	 {Zaniolo, Carlo and Yang, Mohan and Das, Ariyam and
                  Shkapsky, Alexander and Condie, Tyson and
                  Interlandi, Matteo},
  journal =	 {Theory and Practice of Logic Programming},
  volume =	 17,
  number =	 {5-6},
  pages =	 {1048--1065},
  year =	 2017,
  publisher =	 {Cambridge University Press},
  abstract =	 {A very desirable Datalog extension investigated by
                  many researchers in the last thirty years consists
                  in allowing the use of the basic SQL aggregates min,
                  max, count and sum in recursive rules. In this
                  paper, we propose a simple comprehensive solution
                  that extends the declarative least-fixpoint
                  semantics of Horn Clauses, along with the
                  optimization techniques used in the bottom-up
                  implementation approach adopted by many Datalog
                  systems. We start by identifying a large class of
                  programs of great practical interest in which the
                  use of min or max in recursive rules does not
                  compromise the declarative fixpoint semantics of the
                  programs using those rules. Then, we revisit the
                  monotonic versions of count and sum aggregates
                  proposed in (Mazuran et al. 2013b) and named,
                  respectively, mcount and msum. Since mcount, and
                  also msum on positive numbers, are monotonic in the
                  lattice of set-containment, they preserve the
                  fixpoint semantics of Horn Clauses. However, in many
                  applications of practical interest, their use can
                  lead to inefficiencies, that can be eliminated by
                  combining them with max, whereby mcount and msum
                  become the standard count and sum. Therefore, the
                  semantics and optimization techniques of Datalog are
                  extended to recursive programs with min, max, count
                  and sum, making possible the advanced applications
                  of superior performance and scalability demonstrated
                  by BigDatalog (Shkapsky et al. 2016) and Datalog-MC
                  (Yang et al. 2017). }
}

@inproceedings{motik-aaai15,
  author    = {Boris Motik and
               Yavor Nenov and
               Robert Edgar Felix Piro and
               Ian Horrocks},
  title     = {Incremental Update of {Datalog}< Materialisation: the Backward/Forward
               Algorithm},
  booktitle = AAAI,
  month =     {January 25-30},
  address =   {Austin, Texas},
  pages     = {1560--1568},
  publisher = {{AAAI} Press},
  year      = {2015},
  url       = {http://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/view/9660},
}

@inproceedings{gupta-sigmod93,
  author =	 {Ashish Gupta and Inderpal Singh Mumick and
                  V. S. Subrahmanian},
  title =	 {Maintaining Views Incrementally},
  booktitle =	 SIGMOD,
  address =	 {Washington, DC},
  month =	 {May 26-28},
  pages =	 {157--166},
  publisher =	 {{ACM} Press},
  year =	 1993,
  url =		 {https://doi.org/10.1145/170035.170066},
  doi =		 {10.1145/170035.170066},
  comments =	 {DeRed algorithm}
}

@inproceedings{Ceri-VLDB91,
  author =	 {Stefano Ceri and Jennifer Widom},
  editor =	 {Guy M. Lohman and Am{\'{\i}}lcar Sernadas and Rafael
                  Camps},
  title =	 {Deriving Production Rules for Incremental View
                  Maintenance},
  booktitle =	 VLDB,
  address =	 {Barcelona, Spain},
  pages =	 {577--589},
  year =	 1991,
  url =		 {http://www.vldb.org/conf/1991/P577.PDF},
}

@inproceedings{Wolfson-sigmod91,
  author =	 {Ouri Wolfson and Hasanat M. Dewan and Salvatore
                  J. Stolfo and Yechiam Yemini},
  title =	 {Incremental Evaluation of Rules and its Relationship
                  to Parallelism},
  booktitle =	 SIGMOD,
  address =	 {Denver, Colorado},
  month =	 {May 29-31},
  pages =	 {78--87},
  publisher =	 {{ACM} Press},
  year =	 1991,
  url =		 {https://doi.org/10.1145/115790.115799},
  doi =		 {10.1145/115790.115799},
}

@inproceedings{Staudt-vldb96,
  author =	 {Martin Staudt and Matthias Jarke},
  title =	 {Incremental Maintenance of Externally Materialized
                  Views},
  booktitle =	 VLDB,
  address =	 {Mumbai (Bombay), India},
  month =	 {September 3-6},
  pages =	 {75--86},
  year =	 1996,
  url =		 {http://www.vldb.org/conf/1996/P075.PDF},
}

@inproceedings{Kotowski-rr11,
  author =	 {Jakub Kotowski and Fran{\c{c}}ois Bry and Simon
                  Brodt},
  title =	 {Reasoning as Axioms Change - Incremental View
                  Maintenance Reconsidered},
  booktitle =	 {Web Reasoning and Rule Systems {RR}},
  address =	 {Galway, Ireland},
  month =	 {August 29-30},
  series =	 {Lecture Notes in Computer Science},
  volume =	 6902,
  pages =	 {139--154},
  publisher =	 {Springer},
  year =	 2011,
  url =		 {https://doi.org/10.1007/978-3-642-23580-1\_11},
  doi =		 {10.1007/978-3-642-23580-1\_11},
}

@inproceedings{Lu-sigmod95,
  author =	 {James J. Lu and Guido Moerkotte and Joachim
                  Sch{\"{u}} and V. S. Subrahmanian},
  title =	 {Efficient Maintenance of Materialized Mediated
                  Views},
  booktitle =	 SIGMOD,
  address =	 {San Jose, California},
  month =	 {May 22-25},
  pages =	 {340--351},
  year =	 1995,
  url =		 {https://doi.org/10.1145/223784.223850},
  doi =		 {10.1145/223784.223850},
}

@article{Dewan-iis92,
  author    = {Hasanat M. Dewan and
               David Ohsie and
               Salvatore J. Stolfo and
               Ouri Wolfson and
               Sushil Da Silva},
  title     = {Incremental Database Rule Processing In {PARADISER}},
  journal   = {J. Intell. Inf. Syst.},
  volume    = {1},
  number    = {2},
  pages     = {177--209},
  year      = {1992},
  url       = {https://doi.org/10.1007/BF00962282},
  doi       = {10.1007/BF00962282},
}

@inproceedings{Apt-sigmod87,
  author =	 {Krzysztof R. Apt and Jean{-}Marc Pugin},
  editor =	 {Moshe Y. Vardi},
  title =	 {Maintenance of Stratified Databases Viewed as a
                  Belief Revision System},
  booktitle =	 SIGMOD,
  month =	 {March 23-25},
  address =	 {San Diego, California},
  pages =	 {136--145},
  year =	 1987,
  url =		 {https://doi.org/10.1145/28659.28674},
  doi =		 {10.1145/28659.28674},
}

@inproceedings{Harrison-wdd92,
  author =	 {John V. Harrison and Suzanne W. Dietrich},
  editor =	 {Kotagiri Ramamohanarao and James Harland and Guozhu
                  Dong},
  title =	 {Maintenance of Materialized Views in a Deductive
                  Database: An Update Propagation Approach},
  booktitle =	 {Workshop on Deductive Databases},
  address =	 {Washington, D.C.},
  month =	 {November 14},
  series =	 {Technical Report},
  volume =	 {{CITRI/TR-92-65}},
  pages =	 {56--65},
  publisher =	 {Department of Computer Science, University of
                  Melbourne},
  year =	 1992,
}

@Book{rabiner-book75,
  editor = 	 {L. R. Rabiner and B. Gold},
  title = 	 {Theory and Application of Digital Signal Processing},
  publisher = 	 {Prentice-Hall},
  year = 	 1975}

@inproceedings{Hammer-pldi09,
  author =	 {Matthew A. Hammer and Umut A. Acar and Yan Chen},
  editor =	 {Michael Hind and Amer Diwan},
  title =	 {{CEAL:} a {C}-based language for self-adjusting
                  computation},
  booktitle =	 PLDI,
  address =	 {Dublin, Ireland},
  month =	 {June 15-21},
  pages =	 {25--37},
  publisher =	 {{ACM}},
  year =	 2009,
  url =		 {https://doi.org/10.1145/1542476.1542480},
  doi =		 {10.1145/1542476.1542480},
}

@inproceedings{Cai-pldi14,
  author =	 {Yufei Cai and Paolo G. Giarrusso and Tillmann Rendel
                  and Klaus Ostermann},
  title =	 {A theory of changes for higher-order languages:
                  incrementalizing {\(\lambda\)}-calculi by static
                  differentiation},
  booktitle =	 PLDI,
  address =	 {Edinburgh, United Kingdom},
  month =	 {June 09 - 11},
  pages =	 {145--155},
  publisher =	 {{ACM}},
  year =	 2014,
  url =		 {https://doi.org/10.1145/2594291.2594304},
  doi =		 {10.1145/2594291.2594304},
}

@inproceedings{jafarpour-edbt19,
  title =	 {{KSQL}: Streaming {SQL} Engine for {Apache Kafka}},
  author =	 {Jafarpour, Hojjat and Desai, Rohan and Guy, Damian},
  booktitle =	 {International Conference on Extending Database
                  Technology (EDBT)},
  address =	 {Lisbon, Portugal},
  month =	 {March 26-29},
  pages =	 {524--533},
  year =	 2019,
  url =          {http://openproceedings.org/2019/conf/edbt/EDBT19_paper_329.pdf},
  abstract =	 {Demand for real-time stream processing has been
                  increasing and Apache Kafka has become the de-facto
                  streaming data platform in many organizations. Kafka
                  Streams API along with several other open source
                  stream processing systems can be used to process the
                  streaming data in Kafka, however, these systems have
                  very high barrier of entry and require programming
                  in languages such as Java or Scala.  In this paper,
                  we present KSQL, a streaming SQL engine for Apache
                  Kafka. KSQL provides a simple and completely
                  interactive SQL interface for stream processing on
                  Apache Kafka; no need to write code in a programming
                  language such as Java or Python.  KSQL is
                  open-source, distributed, scalable, reliable, and
                  real-time.  It supports a wide range of powerful
                  stream processing operations including aggregations,
                  joins, windowing, sessionization, and much more. It
                  is extensible using User Defined Functions (UDFs)
                  and User Defined Aggregate Functions (UDAFs). KSQL
                  is implemented on Kafka Streams API which means it
                  provides exactly once delivery guarantee, linear
                  scalability, fault tolerance and can run as a
                  library without requiring a separate cluster.}
}


@inproceedings{graefe-sigmod90,
  author =	 {Goetz Graefe},
  title =	 {Encapsulation of parallelism in the {Volcano} query
                  processing system},
  booktitle =	 {SIGMOD '90: Proceedings of the 1990 ACM SIGMOD
                  international conference on Management of data},
  year =	 1990,
  isbn =	 {0-89791-365-5},
  pages =	 {102--111},
  location =	 {Atlantic City, New Jersey, United States},
  doi =		 {http://doi.acm.org/10.1145/93597.98720},
  publisher =	 {ACM Press},
  address =	 {New York, NY, USA},
  abstract =	 {Volcano is a new dataflow query processing system we
                  have developed for database systems research and
                  education. The uniform interface between operators
                  makes Volcano extensible by new operators. All
                  operators are designed and coded as if they were
                  meant for a single-process system only. When
                  attempting to parallelize Volcano, we had to choose
                  between two models of parallelization, called here
                  the bracket and operator models. We describe the
                  reasons for not choosing the bracket model,
                  introduce the novel operator model, and provide
                  details of Volcano's exchange operator that
                  parallelizes all other operators. It allows
                  intra-operator parallelism on partitioned datasets
                  and both vertical and horizontal inter-operator
                  parallelism. The exchange operator encapsulates all
                  parallelism issues and therefore makes
                  implementation of parallel database algorithms
                  significantly easier and more robust. Included in
                  this encapsulation is the translation between
                  demand-driven dataflow within processes and
                  data-driven dataflow between processes. Since the
                  interface between Volcano operators is similar to
                  the one used in ``real,'' commercial systems, the
                  techniques described here can be used to parallelize
                  other query processing engines.},
  comments =	 {This paper seems to be about self-scheduling
                  distributed data-flow computations of SQL
                  queries. The problem with this approach is that it
                  assumes fault-free computation, and that any failure
                  will make recovery impossible. There seems to be no
                  data-driven scheduling: I could not figure out how
                  the decision on which parts of the query are
                  parallelized and which are run sequentially is made
                  (this decision is made dynamically, but it could be
                  completely suboptimal -- In fact, there is no query
                  execution plan). There is practically no evaluation,
                  except one microbenchmark, but he cites 4 other
                  papers which have evaluations (he doesn't summarize
                  the results from the other studies, unfortunately).
                  The main contribution of the paper seems to be a
                  trasformation of distributed computation into the
                  traditional iterator "pull" model.},
}

@inproceedings{idris-sigmod17,
  author =	 {Idris, Muhammad and Ugarte, Martin and Vansummeren,
                  Stijn},
  title =	 {The Dynamic Yannakakis Algorithm: Compact and
                  Efficient Query Processing Under Updates},
  year =	 2017,
  url =		 {https://doi.org/10.1145/3035918.3064027},
  abstract =	 {Modern computing tasks such as real-time analytics
                  require refresh of query results under high update
                  rates. Incremental View Maintenance (IVM) approaches
                  this problem by materializing results in order to
                  avoid recomputation. IVM naturally induces a
                  trade-off between the space needed to maintain the
                  materialized results and the time used to process
                  updates. In this paper, we show that the full
                  materialization of results is a barrier for more
                  general optimization strategies. In particular, we
                  present a new approach for evaluating queries under
                  updates. Instead of the materialization of results,
                  we require a data structure that allows: (1) linear
                  time maintenance under updates, (2) constant-delay
                  enumeration of the output, (3) constant-time lookups
                  in the output, while (4) using only linear space in
                  the size of the database. We call such a structure a
                  Dynamic Constant-delay Linear Representation (DCLR)
                  for the query. We show that DYN, a dynamic version
                  of the Yannakakis algorithm, yields DCLRs for the
                  class of free-connex acyclic CQs. We show that this
                  is optimal in the sense that no DCLR can exist for
                  CQs that are not free-connex acyclic. Moreover, we
                  identify a sub-class of queries for which DYN
                  features constant-time update per tuple and show
                  that this class is maximal. Finally, using the TPC-H
                  and TPC-DS benchmarks, we experimentally compare DYN
                  and a higher-order IVM (HIVM) engine. Our approach
                  is not only more efficient in terms of memory
                  consumption (as expected), but is also consistently
                  faster in processing updates.},
  booktitle =	 SIGMOD,
  pages =	 {1259–1274},
  numpages =	 16,
  keywords =	 {incremental view maintenance, acyclic joins, dynamic
                  query processing},
  location =	 {Chicago, Illinois, USA}
}

@article{kara-tds20,
  author =	 {Kara, Ahmet and Ngo, Hung Q. and Nikolic, Milos and
                  Olteanu, Dan and Zhang, Haozhe},
  title =	 {Maintaining Triangle Queries under Updates},
  year =	 2020,
  issue_date =	 {September 2020},
  volume =	 45,
  number =	 3,
  issn =	 {0362-5915},
  url =		 {https://doi.org/10.1145/3396375},
  abstract =	 {We consider the problem of incrementally maintaining
                  the triangle queries with arbitrary free variables
                  under single-tuple updates to the input relations.We
                  introduce an approach called IVMϵ that exhibits a
                  trade-off between the update time, the space, and
                  the delay for the enumeration of the query result,
                  such that the update time ranges from the square
                  root to linear in the database size while the delay
                  ranges from constant to linear time.IVMϵ achieves
                  Pareto worst-case optimality in the update-delay
                  space conditioned on the Online Matrix-Vector
                  Multiplication conjecture. It is strongly Pareto
                  optimal for the triangle queries with no or three
                  free variables and weakly Pareto optimal for the
                  remaining triangle queries with one or two free
                  variables.IVMϵ recovers prior work such as the
                  suboptimal classical view maintenance approach that
                  uses delta query processing and the worst-case
                  optimal approach that computes all triangles in a
                  static database.},
  journal =	 {ACM Trans. Database Syst.},
  month =	 {aug},
  articleno =	 11,
  numpages =	 46,
  keywords =	 {amortized update time, enumeration delay, complexity
                  trade-off, Pareto worst-case optimality, Incremental
                  view maintenance}
}


@inproceedings{nikolic-icmd18,
  author =	 {Nikolic, Milos and Olteanu, Dan},
  title =	 {Incremental View Maintenance with Triple Lock
                  Factorization Benefits},
  year =	 2018,
  url =		 {https://doi.org/10.1145/3183713.3183758},
  abstract =	 {We introduce F-IVM, a unified incremental view
                  maintenance (IVM) approach for a variety of tasks,
                  including gradient computation for learning linear
                  regression models over joins, matrix chain
                  multiplication, and factorized evaluation of
                  conjunctive queries.F-IVM is a higher-order IVM
                  algorithm that reduces the maintenance of the given
                  task to the maintenance of a hierarchy of
                  increasingly simpler views. The views are functions
                  mapping keys, which are tuples of input data values,
                  to payloads, which are elements from a task-specific
                  ring. Whereas the computation over the keys is the
                  same for all tasks, the computation over the
                  payloads depends on the task. F-IVM achieves
                  efficiency by factorizing the computation of the
                  keys, payloads, and updates. We implemented F-IVM as
                  an extension of DBToaster. We show in a range of
                  scenarios that it can outperform classical
                  first-order IVM, DBToaster's fully recursive
                  higher-order IVM, and plain recomputation by orders
                  of magnitude while using less memory.},
  booktitle =	 {International Conference on Management of Data
                  (ICMD)},
  pages =	 {365–380},
  numpages =	 16,
  keywords =	 {factorized representation, query optimization,
                  stream processing, materialized views, incremental
                  view maintenance, rings},
  location =	 {Houston, TX, USA}
}


@InProceedings{foster-planx08,
  author =	 {J. Nathan Foster and Ravi Konuru and Jerome Simeon and
                  Lionel Villard.},
  title =	 {An Algebraic Approach to {XQuery} View Maintenance},
  booktitle =	 {ACM SIGPLAN Workshop on Programming Languages
                  Technologies for XML},
  year =	 2008,
  month =	 {January 9},
  address =	 {San Francisco, CA},
  abstract =	 {View maintenance is a problem in data management
                  that arises whenever a view is materialized over a
                  source that changes over time. When the source is
                  large, or when the source and view reside on
                  different hosts, it is not practical to recompute
                  the view and retransmit it over the network each
                  time the source is updated. A better idea, commonly
                  used in systems built with view maintenance in mind,
                  is to translate source updates to ones that can be
                  applied to the view directly. The cost of
                  calculating, transmitting, and applying a translated
                  update is typically dramatically less than the cost
                  of recomputing and retransmitting the entire view.
                  This paper addresses the problem of maintaining
                  XQuery views over XML data. The core algorithm
                  translates updates through queries as expressed in
                  the tree algebra used internally in the Galax
                  engine. This algorithm extends previous work on
                  maintenance for relational views, although there are
                  significant complications due to the data model,
                  which is both ordered and nested. To overcome these
                  obstacles, we propose a scheme for storing auxiliary
                  data that guides the translation of updates in this
                  more complicated setting.  A novel aspect of our
                  approach compared to previous work is that the
                  amount and content of annotations can be controlled
                  by users, making it possible to balance the
                  tradeoffs between the size of the auxiliary data and
                  the quality of translated updates.  We have built a
                  prototype implementation to test these ideas.  Our
                  system extends Galax, and handles a core set of
                  operators and built-in functions capable of
                  expressing many typical first- order queries. Its
                  design is fully compositional, so it can easily be
                  extended to new operators. We present preliminary
                  results of experiments run on benchmark queries from
                  the XMark suite.}
}

@inproceedings{begoli-icmd18,
  author =	 {Begoli, Edmon and Camacho-Rodr\'{\i}guez, Jes\'{u}s
                  and Hyde, Julian and Mior, Michael J. and Lemire,
                  Daniel},
  title =	 {{Apache} {Calcite}: A Foundational Framework for
                  Optimized Query Processing Over Heterogeneous Data
                  Sources},
  year =	 2018,
  url =		 {https://doi.org/10.1145/3183713.3190662},
  abstract =	 {Apache Calcite is a foundational software framework
                  that provides query processing, optimization, and
                  query language support to many popular open-source
                  data processing systems such as Apache Hive, Apache
                  Storm, Apache Flink, Druid, and MapD. The goal of
                  this paper is to formally introduce Calcite to the
                  broader research community, brie y present its
                  history, and describe its architecture, features,
                  functionality, and patterns for adoption. Calcite's
                  architecture consists of a modular and extensible
                  query optimizer with hundreds of built-in
                  optimization rules, a query processor capable of
                  processing a variety of query languages, an adapter
                  architecture designed for extensibility, and support
                  for heterogeneous data models and stores
                  (relational, semi-structured, streaming, and
                  geospatial). This exible, embeddable, and extensible
                  architecture is what makes Calcite an attractive
                  choice for adoption in big-data frameworks. It is an
                  active project that continues to introduce support
                  for the new types of data sources, query languages,
                  and approaches to query processing and
                  optimization.},
  booktitle =	 {International Conference on Management of Data
                  (IDMD)},
  pages =	 {221–230},
  numpages =	 10,
  keywords =	 {data management, modular query optimization, query
                  algebra, relational semantics, storage adapters,
                  apache calcite},
  location =	 {Houston, TX, USA}
}


@Misc{sqllogictest,
  title =	 {sqllogictest},
  howpublished = {\url{https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki}},
  note =	 {Retrieved December 2022},
  abstract =	 {Sqllogictest is a program designed to verify that an
                  SQL database engine computes correct results by
                  comparing the results to identical queries from
                  other SQL database engines. Sqllogictest was
                  originally designed to test SQLite, but it is
                  database engine neutral and can just as easily be
                  used to test other database products. }
}

@article{tangwongsan-vldb15,
  author =	 {Tangwongsan, Kanat and Hirzel, Martin and Schneider,
                  Scott and Wu, Kun-Lung},
  title =	 {General Incremental Sliding-Window Aggregation},
  year =	 2015,
  issue_date =	 {February 2015},
  publisher =	 {VLDB Endowment},
  volume =	 8,
  number =	 7,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/2752939.2752940},
  abstract =	 {Stream processing is gaining importance as more data
                  becomes available in the form of continuous streams
                  and companies compete to promptly extract insights
                  from them. In such applications, sliding-window
                  aggregation is a central operator, and incremental
                  aggregation helps avoid the performance penalty of
                  re-aggregating from scratch for each window
                  change.This paper presents Reactive Aggregator (RA),
                  a new framework for incremental sliding-window
                  aggregation. RA is general in that it does not
                  require aggregation functions to be invertible or
                  commutative, and it does not require windows to be
                  FIFO. We implemented RA as a drop-in replacement for
                  the Aggregate operator of a commercial streaming
                  engine. Given m updates on a window of size n, RA
                  has an algorithmic complexity of O(m + m log (n/m)),
                  rivaling the best prior algorithms for any
                  m. Furthermore, RA's implementation minimizes
                  overheads from allocation and pointer traversals by
                  using a single flat array.},
  journal =	 {Proc. VLDB Endow.},
  month =	 {February},
  pages =	 {702–713},
  numpages =	 12
}


@inproceedings{abeysinghe-sigmod22,
  author =	 {Abeysinghe, Supun and He, Qiyang and Rompf, Tiark},
  title =	 {Efficient Incrementialization of Correlated Nested
                  Aggregate Queries Using Relative Partial Aggregate
                  Indexes (RPAI)},
  year =	 2022,
  url =		 {https://doi.org/10.1145/3514221.3517889},
  abstract =	 {Incrementalization of queries is imperative in cases
                  where data arrives as streams and output is
                  latency-critical and/or desired before the full data
                  has been received. Incremental execution computes
                  the output at a given time by reusing the previously
                  computed outputs or maintained views rather than
                  re-evaluating the query from scratch. There are
                  various approaches to perform this
                  incrementalization ranging from query-specific
                  algorithms and data structures (e.g., DYN, AJU) to
                  general systems (e.g., DBToaster,
                  Materialize).DBToaster is a state-of-the-art system
                  that comes with an appealing theoretical background
                  based on the idea of applying Incremental View
                  Maintenance (IVM) recursively, maintaining a
                  hierarchy of materialized views via delta
                  queries. However, one key limitation of this
                  approach is its inability to efficiently
                  incrementalize correlated nested-aggregate queries
                  due to an inefficient delta rule for such
                  queries. Moreover, none of the other specialized
                  approaches have shown efficient ways to optimize
                  such queries either. Nonetheless, these types of
                  queries can be found in many real-world application
                  domains (e.g., finance), for which efficient
                  incrementalization remains a crucial open
                  problem. In this work, we propose an approach to
                  incrementalize such queries based on a novel
                  tree-based index structure called Relative Partial
                  Aggregate Indexes (RPAI). Our approach is
                  asymptotically faster than other systems and shows
                  up to 1100\texttimes{} speedups in workloads of
                  practical importance.},
  booktitle =	 SIGMOD,
  pages =	 {136–149},
  numpages =	 14,
  keywords =	 {nested aggregate queries, aggregate indexes,
                  incremental processing, parent-relative trees},
  location =	 {Philadelphia, PA, USA},
}

@book{chirkova-book12,
  author =	 {Chirkova, Rada and Yang, Jun},
  title =	 {Materialized Views},
  year =	 2012,
  isbn =	 {160198622X},
  publisher =	 {Now Publishers Inc.},
  address =	 {Hanover, MA, USA},
  abstract =	 {Materialized views are a natural embodiment of the
                  ideas of precomputation and caching in
                  databases. Instead of computing a query from
                  scratch, a system can use results that have already
                  been computed, stored, and kept in sync with
                  database updates. The ability of materialized views
                  to speed up queries benefits most database
                  applications, ranging from traditional querying and
                  reporting to web database caching, online analytical
                  processing, and data mining. By reducing dependency
                  on the availability of base data, materialized views
                  have also laid much of the foundation for
                  information integration and data warehousing
                  systems. The database tradition of declarative
                  querying distinguishes materialized views from
                  generic applications of precomputation and caching
                  in other contexts, and makes materialized views
                  especially interesting, powerful, and challenging at
                  the same time. Study of materialized views has
                  generated a rich research literature and mature
                  commercial implementations, aimed at providing
                  efficient, effective, automated, and general
                  solutions to the selection, use, and maintenance of
                  materialized views. This monograph provides an
                  accessible introduction and reference to
                  materialized views, explains its core ideas,
                  highlights its recent developments, and points out
                  its sometimes subtle connections to other research
                  topics in databases.}
}


@inproceedings{chaudhuri-icde95,
  author =	 {Chaudhuri, Surajit and Krishnamurthy, Ravi and
                  Potamianos, Spyros and Shim, Kyuseok},
  title =	 {Optimizing Queries with Materialized Views},
  year =	 1995,
  abstract =	 {While much work has addressed the problem of
                  maintaining materialized views, the important
                  question of optimizing queries in the presence of
                  materialised views has not been resolved. In this
                  paper, we analyze the optimization question and
                  provide a comprehensive and efficient solution. Our
                  solution has the desirable property that it is a
                  simple generalization of the traditional query
                  optimization algorithm.},
  booktitle =	 ICDE,
  pages =	 {190–200},
  numpages =	 11,
  keywords =	 {query processing, materialized views, query
                  optimization algorithm, optimisation}
}

@article{idris-sigmod19,
  author =	 {Idris, Muhammad and Ugarte, Mart\'{\i}n and
                  Vansummeren, Stijn and Voigt, Hannes and Lehner,
                  Wolfgang},
  title =	 {Efficient Query Processing for Dynamically Changing
                  Datasets},
  year =	 2019,
  issue_date =	 {March 2019},
  volume =	 48,
  number =	 1,
  url =		 {https://doi.org/10.1145/3371316.3371325},
  abstract =	 {The ability to efficiently analyze changing data is
                  a key requirement of many real-time analytics
                  applications. Traditional approaches to this problem
                  were developed around the notion of Incremental View
                  Maintenance (IVM), and are based either on the
                  materialization of subresults (to avoid their
                  recomputation) or on the recomputation of subresults
                  (to avoid the space overhead of
                  materialization). Both techniques are suboptimal:
                  instead of materializing results and subresults, one
                  may also maintain a data structure that supports
                  efficient maintenance under updates and from which
                  the full query result can quickly be enumerated. In
                  two previous articles, we have presented algorithms
                  for dynamically evaluating queries that are easy to
                  implement, efficient, and can be naturally extended
                  to evaluate queries from a wide range of application
                  domains. In this paper, we discuss our algorithm and
                  its complexity, explaining the main components
                  behind its efficiency. Finally, we show experiments
                  that compare our algorithm to a state-of-the-art
                  (Higher-order) IVM engine, as well as to a prominent
                  complex event recognition engine. Our approach
                  outperforms the competitor systems by up to two
                  orders of magnitude in processing time, and one
                  order in memory consumption.},
  journal =	 {SIGMOD Rec.},
  month =	 {November},
  pages =	 {33–40},
  numpages =	 8
}

@article{idris-vldb18,
  author =	 {Idris, Muhammad and Ugarte, Mart\'{\i}n and
                  Vansummeren, Stijn and Voigt, Hannes and Lehner,
                  Wolfgang},
  title =	 {Conjunctive Queries with Inequalities under Updates},
  year =	 2018,
  issue_date =	 {March 2018},
  publisher =	 {VLDB Endowment},
  volume =	 11,
  number =	 7,
  issn =	 {2150-8097},
  url =		 {https://doi.org/10.14778/3192965.3192966},
  abstract =	 {Modern application domains such as Composite Event
                  Recognition (CER) and real-time Analytics require
                  the ability to dynamically refresh query results
                  under high update rates. Traditional approaches to
                  this problem are based either on the materialization
                  of subresults (to avoid their recomputation) or on
                  the recomputation of subresults (to avoid the space
                  overhead of materialization). Both techniques have
                  recently been shown suboptimal: instead of
                  materializing results and subresults, one can
                  maintain a data structure that supports efficient
                  maintenance under updates and can quickly enumerate
                  the full query output, as well as the changes
                  produced under single updates. Unfortunately, these
                  data structures have been developed only for
                  aggregate-join queries composed of equi-joins,
                  limiting their applicability in domains such as CER
                  where temporal joins are commonplace. In this paper,
                  we present a new approach for dynamically evaluating
                  queries with multi-way θ-joins under updates that is
                  effective in avoiding both materialization and
                  recomputation of results, while supporting a wide
                  range of applications. To do this we generalize
                  Dynamic Yannakakis, an algorithm for dynamically
                  processing acyclic equi-join queries. In tandem, and
                  of independent interest, we generalize the notions
                  of acyclicity and free-connexity to arbitrary
                  θ-joins. We instantiate our framework to the case
                  where θ-joins are only composed of equalities and
                  inequalities (&lt;, ≤, =, &gt;, ≥) and
                  experimentally compare this algorithm, called IEDyn,
                  to state of the art CER systems as well as
                  incremental view maintenance engines. IEDyn performs
                  consistently better than the competitor systems with
                  up to two orders of magnitude improvements in both
                  time and memory consumption.},
  journal =	 {Proc. VLDB Endow.},
  month =	 {mar},
  pages =	 {733–745},
  numpages =	 13
}

@inproceedings{griffin-sigmod95,
  author =	 {Griffin, Timothy and Libkin, Leonid},
  title =	 {Incremental Maintenance of Views with Duplicates},
  year =	 1995,
  url =		 {https://doi.org/10.1145/223784.223849},
  abstract =	 {We study the problem of efficient maintenance of
                  materialized views that may contain duplicates. This
                  problem is particularly important when queries
                  against such views involve aggregate functions,
                  which need duplicates to produce correct
                  results. Unlike most work on the view maintenance
                  problem that is based on an algorithmic approach,
                  our approach is algebraic and based on equational
                  reasoning. This approach has a number of advantages:
                  it is robust and easily extendible to new language
                  constructs, it produces output that can be used by
                  query optimizers, and it simplifies correctness
                  proofs.We use a natural extension of the relational
                  algebra operations to bags (multisets) as our basic
                  language. We present an algorithm that propagates
                  changes from base relations to materialized
                  views. This algorithm is based on reasoning about
                  equivalence of bag-valued expressions. We prove that
                  it is correct and preserves a certain notion of
                  minimality that ensures that no unnecessary tuples
                  are computed. Although it is generally only a
                  heuristic that computing changes to the view rather
                  than recomputing the view from scratch is more
                  efficient, we prove results saying that under normal
                  circumstances one should expect, the change
                  propagation algorithm to be significantly faster and
                  more space efficient than complete recomputing of
                  the view. We also show that our approach interacts
                  nicely with aggregate functions, allowing their
                  correct evaluation on views that change.},
  booktitle =	 SIGMOD,
  pages =	 {328–339},
  numpages =	 12,
  location =	 {San Jose, California, USA}
}

@inproceedings{wang-sigmod20,
  author =	 {Wang, Qichen and Yi, Ke},
  title =	 {Maintaining Acyclic Foreign-Key Joins under Updates},
  year =	 2020,
  url =		 {https://doi.org/10.1145/3318464.3380586},
  abstract =	 {A large number of analytical queries (e.g., all the
                  22 queries in the TPC-H benchmark) are based on
                  acyclic foreign-key joins. In this paper, we study
                  the problem of incrementally maintaining the query
                  results of these joins under updates, i.e.,
                  insertion and deletion of tuples to any of the
                  relations. Prior work has shown that this problem is
                  inherently hard, requiring at least Ω(|db|1/2 -ε)
                  time per update, where |db| is the size of the
                  database, and ε &gt; 0 can be any small
                  constant. However, this negative result holds only
                  on adversarially constructed update sequences; on
                  the other hand, most real-world update sequences are
                  "nice", nowhere near these worst-case scenarios. We
                  introduce a measure λ, which we call the
                  enclosureness of the update sequence, to more
                  precisely characterize its intrinsic difficulty. We
                  present an algorithm to maintain the query results
                  of any acyclic foreign-key join in O(λ) time
                  amortized, on any update sequence whose
                  enclosureness is λ. This is complemented with a
                  lower bound of Ω(λ1-ε), showing that our algorithm
                  is essentially optimal with respect to λ. Next,
                  using this algorithm as the core component, we show
                  how all the 22 queries in the TPC-H benchmark can be
                  supported in ~O(\l{}ambda) time. Finally, based on
                  the algorithms developed, we built a continuous
                  query processing system on top of Flink, and
                  experimental results show that our system
                  outperforms previous ones significantly.},
  booktitle =	 SIGMOD,
  pages =	 {1225–1239},
  numpages =	 15,
  keywords =	 {query evaluation under updates, acyclic joins,
                  incremental view maintenance, sliding windows},
  location =	 {Portland, OR, USA}
}

@InProceedings{moura-cade15,
  author =	 {Leonardo de Moura and Soonho Kong and Jeremy Avigad
                  and Floris van Doorn and Jakob von Raumer},
  title =	 {The {Lean} Theorem Prover},
  booktitle =	 {International Conference on Automated Deduction
                  (CADE-25)},
  year =	 2015,
  address =	 {Berlin, Germany},
  abstract =	 {Lean is a new open source theorem prover being
                  developed at Microsoft Research and Carnegie Mellon
                  University, with a small trusted kernel based on
                  dependent type theory. It aims to bridge the gap
                  between interactive and automated theorem proving,
                  by situating automated tools and methods in a
                  framework that supports user inter- action and the
                  construction of fully specified axiomatic
                  proofs. Lean is an ongoing and long-term effort, but
                  it already provides many useful com- ponents,
                  integrated development environments, and a rich API
                  which can be used to embed it into other systems. It
                  is currently being used to formalize category
                  theory, homotopy type theory, and abstract algebra.
                  We describe the project goals, system architecture,
                  and main features, and we discuss applications and
                  continuing work.}
}


@article{bonifati-iclp2018,
  title =        {Certified Graph View Maintenance with Regular
                  {Datalog}},
  author =       {Bonifati, Angela and Dumbrava, Stefania and Arias,
                  Emilio Jes\'{u}s Gallego},
  volume =       18,
  DOI =          {10.1017/S1471068418000224},
  number =       {3-4},
  journal =      {Theory and Practice of Logic Programming},
  publisher =    {Cambridge University Press},
  year =         2018,
  pages =        {372–389},
  abstract =     {We employ the Coq proof assistant to develop a
                  mechanically-certified framework for evaluating
                  graph queries and incrementally maintaining
                  materialized graph instances, also called views. The
                  language we use for defining queries and views is
                  Regular Datalog (RD) – a notable fragment of
                  non-recursive Datalog that can express complex
                  navigational queries, with transitive closure as
                  native operator. We first design and encode the
                  theory of RD and then mechanize a RD-specific
                  evaluation algorithm capable of fine-grained,
                  incremental graph view computation, which we prove
                  sound with respect to the declarative RD
                  semantics. By using the Coq extraction mecha- nism,
                  we test an OCaml version of the verified engine on a
                  set of preliminary benchmarks. Our development is
                  particularly focused on leveraging existing
                  verification and notational techniques to: a) define
                  mechanized properties that can be easily understood
                  by logicians and database researchers and b) attain
                  formal verification with limited effort. Our work is
                  the first step towards a unified,
                  machine-verified, formal framework for dynamic graph
                  query languages and their evaluation engines.}
}
